/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * State machine for state 1, Awaiting Connection State. * The handling of the timer(s) is in file ax25_ds_timer.c. * Handling of state 0 and connection release is in ax25.c. */ static int ax25_ds_state1_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type) { switch (frametype) { case AX25_SABM: ax25->modulus = AX25_MODULUS; ax25->window = ax25->ax25_dev->values[AX25_VALUES_WINDOW]; ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); break; case AX25_SABME: ax25->modulus = AX25_EMODULUS; ax25->window = ax25->ax25_dev->values[AX25_VALUES_EWINDOW]; ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); break; case AX25_DISC: ax25_send_control(ax25, AX25_DM, pf, AX25_RESPONSE); break; case AX25_UA: ax25_calculate_rtt(ax25); ax25_stop_t1timer(ax25); ax25_start_t3timer(ax25); ax25_start_idletimer(ax25); ax25->vs = 0; ax25->va = 0; ax25->vr = 0; ax25->state = AX25_STATE_3; ax25->n2count = 0; if (ax25->sk != NULL) { bh_lock_sock(ax25->sk); ax25->sk->sk_state = TCP_ESTABLISHED; /* * For WAIT_SABM connections we will produce an accept * ready socket here */ if (!sock_flag(ax25->sk, SOCK_DEAD)) ax25->sk->sk_state_change(ax25->sk); bh_unlock_sock(ax25->sk); } ax25_dama_on(ax25); /* according to DK4EG's spec we are required to * send a RR RESPONSE FINAL NR=0. */ ax25_std_enquiry_response(ax25); break; case AX25_DM: if (pf) ax25_disconnect(ax25, ECONNREFUSED); break; default: if (pf) ax25_send_control(ax25, AX25_SABM, AX25_POLLON, AX25_COMMAND); break; } return 0; } /* * State machine for state 2, Awaiting Release State. * The handling of the timer(s) is in file ax25_ds_timer.c * Handling of state 0 and connection release is in ax25.c. */ static int ax25_ds_state2_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type) { switch (frametype) { case AX25_SABM: case AX25_SABME: ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); ax25_dama_off(ax25); break; case AX25_DISC: ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); ax25_dama_off(ax25); ax25_disconnect(ax25, 0); break; case AX25_DM: case AX25_UA: if (pf) { ax25_dama_off(ax25); ax25_disconnect(ax25, 0); } break; case AX25_I: case AX25_REJ: case AX25_RNR: case AX25_RR: if (pf) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); ax25_dama_off(ax25); } break; default: break; } return 0; } /* * State machine for state 3, Connected State. * The handling of the timer(s) is in file ax25_timer.c * Handling of state 0 and connection release is in ax25.c. */ static int ax25_ds_state3_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int ns, int nr, int pf, int type) { int queued = 0; switch (frametype) { case AX25_SABM: case AX25_SABME: if (frametype == AX25_SABM) { ax25->modulus = AX25_MODULUS; ax25->window = ax25->ax25_dev->values[AX25_VALUES_WINDOW]; } else { ax25->modulus = AX25_EMODULUS; ax25->window = ax25->ax25_dev->values[AX25_VALUES_EWINDOW]; } ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); ax25_stop_t1timer(ax25); ax25_start_t3timer(ax25); ax25_start_idletimer(ax25); ax25->condition = 0x00; ax25->vs = 0; ax25->va = 0; ax25->vr = 0; ax25_requeue_frames(ax25); ax25_dama_on(ax25); break; case AX25_DISC: ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE); ax25_dama_off(ax25); ax25_disconnect(ax25, 0); break; case AX25_DM: ax25_dama_off(ax25); ax25_disconnect(ax25, ECONNRESET); break; case AX25_RR: case AX25_RNR: if (frametype == AX25_RR) ax25->condition &= ~AX25_COND_PEER_RX_BUSY; else ax25->condition |= AX25_COND_PEER_RX_BUSY; if (ax25_validate_nr(ax25, nr)) { if (ax25_check_iframes_acked(ax25, nr)) ax25->n2count=0; if (type == AX25_COMMAND && pf) ax25_ds_enquiry_response(ax25); } else { ax25_ds_nr_error_recovery(ax25); ax25->state = AX25_STATE_1; } break; case AX25_REJ: ax25->condition &= ~AX25_COND_PEER_RX_BUSY; if (ax25_validate_nr(ax25, nr)) { if (ax25->va != nr) ax25->n2count=0; ax25_frames_acked(ax25, nr); ax25_calculate_rtt(ax25); ax25_stop_t1timer(ax25); ax25_start_t3timer(ax25); ax25_requeue_frames(ax25); if (type == AX25_COMMAND && pf) ax25_ds_enquiry_response(ax25); } else { ax25_ds_nr_error_recovery(ax25); ax25->state = AX25_STATE_1; } break; case AX25_I: if (!ax25_validate_nr(ax25, nr)) { ax25_ds_nr_error_recovery(ax25); ax25->state = AX25_STATE_1; break; } if (ax25->condition & AX25_COND_PEER_RX_BUSY) { ax25_frames_acked(ax25, nr); ax25->n2count = 0; } else { if (ax25_check_iframes_acked(ax25, nr)) ax25->n2count = 0; } if (ax25->condition & AX25_COND_OWN_RX_BUSY) { if (pf) ax25_ds_enquiry_response(ax25); break; } if (ns == ax25->vr) { ax25->vr = (ax25->vr + 1) % ax25->modulus; queued = ax25_rx_iframe(ax25, skb); if (ax25->condition & AX25_COND_OWN_RX_BUSY) ax25->vr = ns; /* ax25->vr - 1 */ ax25->condition &= ~AX25_COND_REJECT; if (pf) { ax25_ds_enquiry_response(ax25); } else { if (!(ax25->condition & AX25_COND_ACK_PENDING)) { ax25->condition |= AX25_COND_ACK_PENDING; ax25_start_t2timer(ax25); } } } else { if (ax25->condition & AX25_COND_REJECT) { if (pf) ax25_ds_enquiry_response(ax25); } else { ax25->condition |= AX25_COND_REJECT; ax25_ds_enquiry_response(ax25); ax25->condition &= ~AX25_COND_ACK_PENDING; } } break; case AX25_FRMR: case AX25_ILLEGAL: ax25_ds_establish_data_link(ax25); ax25->state = AX25_STATE_1; break; default: break; } return queued; } /* * Higher level upcall for a LAPB frame */ int ax25_ds_frame_in(ax25_cb *ax25, struct sk_buff *skb, int type) { int queued = 0, frametype, ns, nr, pf; frametype = ax25_decode(ax25, skb, &ns, &nr, &pf); switch (ax25->state) { case AX25_STATE_1: queued = ax25_ds_state1_machine(ax25, skb, frametype, pf, type); break; case AX25_STATE_2: queued = ax25_ds_state2_machine(ax25, skb, frametype, pf, type); break; case AX25_STATE_3: queued = ax25_ds_state3_machine(ax25, skb, frametype, ns, nr, pf, type); break; } return queued; } skip sending of the notification in the fib code that actually inserts the route. Once the full route has been added, a notification is generated with all nexthops. ip6_route_multipath_add handles 3 use cases: new routes, route replace, and route append. The multipath notification generated needs to be consistent with the order of the nexthops and it should be consistent with the order in a FIB dump which means the route with the first nexthop needs to be used as the route reference. For the first 2 cases (new and replace), a reference to the route used to send the notification is obtained by saving the first route added. For the append case, the last route added is used to loop back to its first sibling route which is the first nexthop in the multipath route. Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-04net: ipv6: Add support to dump multipath routes via RTA_MULTIPATH attributeDavid Ahern2-17/+105 IPv6 returns multipath routes as a series of individual routes making their display and handling by userspace different and more complicated than IPv4, putting the burden on the user to see that a route is part of a multipath route and internally creating a multipath route if desired (e.g., libnl does this as of commit 29b71371e764). This patch addresses this difference, allowing multipath routes to be returned using the RTA_MULTIPATH attribute. The end result is that IPv6 multipath routes can be treated and displayed in a format similar to IPv4: $ ip -6 ro ls vrf red 2001:db8:1::/120 dev eth1 proto kernel metric 256 pref medium 2001:db8:2::/120 dev eth2 proto kernel metric 256 pref medium 2001:db8:200::/120 metric 1024 nexthop via 2001:db8:1::2 dev eth1 weight 1 nexthop via 2001:db8:2::2 dev eth2 weight 1 Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-04net: ipv6: Allow shorthand delete of all nexthops in multipath routeDavid Ahern1-2/+36 IPv4 allows multipath routes to be deleted using just the prefix and length. For example: $ ip ro ls vrf red unreachable default metric 8192 1.1.1.0/24 nexthop via 10.100.1.254 dev eth1 weight 1 nexthop via 10.11.200.2 dev eth11.200 weight 1 10.11.200.0/24 dev eth11.200 proto kernel scope link src 10.11.200.3 10.100.1.0/24 dev eth1 proto kernel scope link src 10.100.1.3 $ ip ro del 1.1.1.0/24 vrf red $ ip ro ls vrf red unreachable default metric 8192 10.11.200.0/24 dev eth11.200 proto kernel scope link src 10.11.200.3 10.100.1.0/24 dev eth1 proto kernel scope link src 10.100.1.3 The same notation does not work with IPv6 because of how multipath routes are implemented for IPv6. For IPv6 only the first nexthop of a multipath route is deleted if the request contains only a prefix and length. This leads to unnecessary complexity in userspace dealing with IPv6 multipath routes. This patch allows all nexthops to be deleted without specifying each one in the delete request. Internally, this is done by walking the sibling list of the route matching the specifications given (prefix, length, metric, protocol, etc). $ ip -6 ro ls vrf red 2001:db8:1::/120 dev eth1 proto kernel metric 256 pref medium 2001:db8:2::/120 dev eth2 proto kernel metric 256 pref medium 2001:db8:200::/120 via 2001:db8:1::2 dev eth1 metric 1024 pref medium 2001:db8:200::/120 via 2001:db8:2::2 dev eth2 metric 1024 pref medium ... $ ip -6 ro del vrf red 2001:db8:200::/120 $ ip -6 ro ls vrf red 2001:db8:1::/120 dev eth1 proto kernel metric 256 pref medium 2001:db8:2::/120 dev eth2 proto kernel metric 256 pref medium ... Because IPv6 allows individual nexthops to be deleted without deleting the entire route, the ip6_route_multipath_del and non-multipath code path (ip6_route_del) have to be discriminated so that all nexthops are only deleted for the latter case. This is done by making the existing fc_type in fib6_config a u16 and then adding a new u16 field with fc_delete_all_nh as the first bit. Suggested-by: Dinesh Dutt <ddutt@cumulusnetworks.com> Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03net: skb_needs_check() accepts CHECKSUM_NONE for txEric Dumazet1-3/+4 My recent change missed fact that UFO would perform a complete UDP checksum before segmenting in frags. In this case skb->ip_summed is set to CHECKSUM_NONE. We need to add this valid case to skb_needs_check() Fixes: b2504a5dbef3 ("net: reduce skb_warn_bad_offload() noise") Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03net: remove support for per driver ndo_busy_poll()Eric Dumazet2-16/+0 We added generic support for busy polling in NAPI layer in linux-4.5 No network driver uses ndo_busy_poll() anymore, we can get rid of the pointer in struct net_device_ops, and its use in sk_busy_loop() Saves NETIF_F_BUSY_POLL features bit. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-nextDavid S. Miller53-640/+576 Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for your net-next tree, they are: 1) Stash ctinfo 3-bit field into pointer to nf_conntrack object from sk_buff so we only access one single cacheline in the conntrack hotpath. Patchset from Florian Westphal. 2) Don't leak pointer to internal structures when exporting x_tables ruleset back to userspace, from Willem DeBruijn. This includes new helper functions to copy data to userspace such as xt_data_to_user() as well as conversions of our ip_tables, ip6_tables and arp_tables clients to use it. Not surprinsingly, ebtables requires an ad-hoc update. There is also a new field in x_tables extensions to indicate the amount of bytes that we copy to userspace. 3) Add nf_log_all_netns sysctl: This new knob allows you to enable logging via nf_log infrastructure for all existing netnamespaces. Given the effort to provide pernet syslog has been discontinued, let's provide a way to restore logging using netfilter kernel logging facilities in trusted environments. Patch from Michal Kubecek. 4) Validate SCTP checksum from conntrack helper, from Davide Caratti. 5) Merge UDPlite conntrack and NAT helpers into UDP, this was mostly a copy&paste from the original helper, from Florian Westphal. 6) Reset netfilter state when duplicating packets, also from Florian. 7) Remove unnecessary check for broadcast in IPv6 in pkttype match and nft_meta, from Liping Zhang. 8) Add missing code to deal with loopback packets from nft_meta when used by the netdev family, also from Liping. 9) Several cleanups on nf_tables, one to remove unnecessary check from the netlink control plane path to add table, set and stateful objects and code consolidation when unregister chain hooks, from Gao Feng. 10) Fix harmless reference counter underflow in IPVS that, however, results in problems with the introduction of the new refcount_t type, from David Windsor. 11) Enable LIBCRC32C from nf_ct_sctp instead of nf_nat_sctp, from Davide Caratti. 12) Missing documentation on nf_tables uapi header, from Liping Zhang. 13) Use rb_entry() helper in xt_connlimit, from Geliang Tang. ==================== Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03sched: cls_flower: expose priority to offloading netdeviceJiri Pirko1-0/+3 The driver that offloads flower rules needs to know with which priority user inserted the rules. So add this information into offload struct. Signed-off-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03tcp: clear pfmemalloc on outgoing skbEric Dumazet1-0/+7 Josef Bacik diagnosed following problem : I was seeing random disconnects while testing NBD over loopback. This turned out to be because NBD sets pfmemalloc on it's socket, however the receiving side is a user space application so does not have pfmemalloc set on its socket. This means that sk_filter_trim_cap will simply drop this packet, under the assumption that the other side will simply retransmit. Well we do retransmit, and then the packet is just dropped again for the same reason. It seems the better way to address this problem is to clear pfmemalloc in the TCP transmit path. pfmemalloc strict control really makes sense on the receive path. Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Josef Bacik <jbacik@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03net: ipv6: Set protocol to kernel for local routesDavid Ahern1-0/+1 IPv6 stack does not set the protocol for local routes, so those routes show up with proto "none": $ ip -6 ro ls table local local ::1 dev lo proto none metric 0 pref medium local 2100:3:: dev lo proto none metric 0 pref medium local 2100:3::4 dev lo proto none metric 0 pref medium local fe80:: dev lo proto none metric 0 pref medium ... Set rt6i_protocol to RTPROT_KERNEL for consistency with IPv4. Now routes show up with proto "kernel": $ ip -6 ro ls table local local ::1 dev lo proto kernel metric 0 pref medium local 2100:3:: dev lo proto kernel metric 0 pref medium local 2100:3::4 dev lo proto kernel metric 0 pref medium local fe80:: dev lo proto kernel metric 0 pref medium ... Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03bridge: vlan dst_metadata hooks in ingress and egress pathsRoopa Prabhu6-2/+82 - ingress hook: - if port is a tunnel port, use tunnel info in attached dst_metadata to map it to a local vlan - egress hook: - if port is a tunnel port, use tunnel info attached to vlan to set dst_metadata on the skb CC: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03bridge: per vlan dst_metadata netlink supportRoopa Prabhu7-48/+641 This patch adds support to attach per vlan tunnel info dst metadata. This enables bridge driver to map vlan to tunnel_info at ingress and egress. It uses the kernel dst_metadata infrastructure. The initial use case is vlan to vni bridging, but the api is generic to extend to any tunnel_info in the future: - Uapi to configure/unconfigure/dump per vlan tunnel data - netlink functions to configure vlan and tunnel_info mapping - Introduces bridge port flag BR_LWT_VLAN to enable attach/detach dst_metadata to bridged packets on ports. off by default. - changes to existing code is mainly refactor some existing vlan handling netlink code + hooks for new vlan tunnel code - I have kept the vlan tunnel code isolated in separate files. - most of the netlink vlan tunnel code is handling of vlan-tunid ranges (follows the vlan range handling code). To conserve space vlan-tunid by default are always dumped in ranges if applicable. Use case: example use for this is a vxlan bridging gateway or vtep which maps vlans to vn-segments (or vnis). iproute2 example (patched and pruned iproute2 output to just show relevant fdb entries): example shows same host mac learnt on two vni's and vlan 100 maps to vni 1000, vlan 101 maps to vni 1001 before (netdev per vni): $bridge fdb show | grep "00:02:00:00:00:03" 00:02:00:00:00:03 dev vxlan1001 vlan 101 master bridge 00:02:00:00:00:03 dev vxlan1001 dst 12.0.0.8 self 00:02:00:00:00:03 dev vxlan1000 vlan 100 master bridge 00:02:00:00:00:03 dev vxlan1000 dst 12.0.0.8 self after this patch with collect metdata in bridged mode (single netdev): $bridge fdb show | grep "00:02:00:00:00:03" 00:02:00:00:00:03 dev vxlan0 vlan 101 master bridge 00:02:00:00:00:03 dev vxlan0 src_vni 1001 dst 12.0.0.8 self 00:02:00:00:00:03 dev vxlan0 vlan 100 master bridge 00:02:00:00:00:03 dev vxlan0 src_vni 1000 dst 12.0.0.8 self CC: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03net/sched: act_ife: Change to use ife moduleYotam Gigi2-78/+33 Use the encode/decode functionality from the ife module instead of using implementation inside the act_ife. Reviewed-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: Roman Mashak <mrv@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03net: Introduce ife encapsulation moduleYotam Gigi5-0/+165 This module is responsible for the ife encapsulation protocol encode/decode logics. That module can: - ife_encode: encode skb and reserve space for the ife meta header - ife_decode: decode skb and extract the meta header size - ife_tlv_meta_encode - encodes one tlv entry into the reserved ife header space. - ife_tlv_meta_decode - decodes one tlv entry from the packet - ife_tlv_meta_next - advance to the next tlv Reviewed-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: Roman Mashak <mrv@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03net/sched: act_ife: Unexport ife_tlv_meta_encodeYotam Gigi1-2/+2 As the function ife_tlv_meta_encode is not used by any other module, unexport it and make it static for the act_ife module. Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: Roman Mashak <mrv@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-03tcp: add tcp_mss_clamp() helperEric Dumazet