/*
 * netsniff-ng - the packet sniffing beast
 * Copyright 2009, 2010 Daniel Borkmann.
 * Copyright 2014, 2015 Tobias Klauser.
 * Subject to the GPL, version 2.
 */

#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <linux/if_ether.h>

#include "xmalloc.h"
#include "die.h"
#include "ring_rx.h"
#include "built_in.h"

/*
 * tpacket v3 data structures and constants are not available for older kernel
 * versions which only support tpacket v2, thus we need protect access to them.
 */
#ifdef HAVE_TPACKET3
static inline bool is_tpacket_v3(int sock)
{
	return get_sockopt_tpacket(sock) == TPACKET_V3;
}

static inline size_t get_ring_layout_size(struct ring *ring, bool v3)
{
	return v3 ? sizeof(ring->layout3) : sizeof(ring->layout);
}

static inline void setup_rx_ring_layout_v3(struct ring *ring)
{
	/* Pass out, if this will ever change and we do crap on it! */
	build_bug_on(offsetof(struct tpacket_req, tp_frame_nr) !=
		     offsetof(struct tpacket_req3, tp_frame_nr) &&
		     sizeof(struct tpacket_req) !=
		     offsetof(struct tpacket_req3, tp_retire_blk_tov));

	ring->layout3.tp_retire_blk_tov = 100; /* 0: let kernel decide */
	ring->layout3.tp_sizeof_priv = 0;
	ring->layout3.tp_feature_req_word = 0;
}

static inline int rx_ring_get_num(struct ring *ring, bool v3)
{
	return v3 ? ring->layout3.tp_block_nr : ring->layout.tp_frame_nr;
}

static inline size_t rx_ring_get_size(struct ring *ring, bool v3)
{
	return v3 ? ring->layout3.tp_block_size : ring->layout.tp_frame_size;
}

int get_rx_net_stats(int sock, uint64_t *packets, uint64_t *drops, bool v3)
{
	int ret;
	union {
		struct tpacket_stats	k2;
		struct tpacket_stats_v3 k3;
	} stats;
	socklen_t slen = v3 ? sizeof(stats.k3) : sizeof(stats.k2);

	memset(&stats, 0, sizeof(stats));
	ret = getsockopt(sock, SOL_PACKET, PACKET_STATISTICS, &stats, &slen);
	if (ret == 0) {
		*packets = stats.k3.tp_packets;
		*drops = stats.k3.tp_drops;
	}
	return ret;
}
#else
static inline bool is_tpacket_v3(int sock __maybe_unused)
{
	return false;
}

static inline size_t get_ring_layout_size(struct ring *ring, bool v3 __maybe_unused)
{
	return sizeof(ring->layout);
}

static inline void setup_rx_ring_layout_v3(struct ring *ring __maybe_unused)
{
}

static inline size_t rx_ring_get_num(struct ring *ring, bool v3 __maybe_unused)
{
	return ring->layout.tp_frame_nr;
}

static inline size_t rx_ring_get_size(struct ring *ring, bool v3 __maybe_unused)
{
	return ring->layout.tp_frame_size;
}

int get_rx_net_stats(int sock, uint64_t *packets, uint64_t *drops,
		     bool v3 __maybe_unused)
{
	int ret;
	struct tpacket_stats stats;
	socklen_t slen = sizeof(stats);

	memset(&stats, 0, sizeof(stats));
	ret = getsockopt(sock, SOL_PACKET, PACKET_STATISTICS, &stats, &slen);
	if (ret == 0) {
		*packets = stats.tp_packets;
		*drops = stats.tp_drops;
	}
	return ret;
}
#endif /* HAVE_TPACKET3 */

void destroy_rx_ring(int sock, struct ring *ring)
{
	int ret;
	bool v3 = is_tpacket_v3(sock);

	munmap(ring->mm_space, ring->mm_len);
	ring->mm_len = 0;

	xfree(ring->frames);

	/* In general, this is freed during close(2) anyway. */
	if (v3)
		return;

	fmemset(&ring->layout, 0, sizeof(ring->layout));
	ret = setsockopt(sock, SOL_PACKET, PACKET_RX_RING, &ring->layout,
			 sizeof(ring->layout));
	if (unlikely(ret))
		panic("Cannot destroy the RX_RING: %s!\n", strerror(errno));
}

static void setup_rx_ring_layout(int sock, struct ring *ring, size_t size,
				 bool jumbo_support, bool v3)
{
	setup_ring_layout_generic(sock, ring, size, jumbo_support);

	if (v3) {
		setup_rx_ring_layout_v3(ring);
		set_sockopt_tpacket_v3(sock);
	} else {
		set_sockopt_tpacket_v2(sock);
	}

	ring_verify_layout(ring);
}

static void create_rx_ring(int sock, struct ring *ring, bool verbose)
{
	int ret;
	bool v3 = is_tpacket_v3(sock);
	size_t layout_size = get_ring_layout_size(ring, v3);

retry:
	ret = setsockopt(sock, SOL_PACKET, PACKET_RX_RING, &ring->raw,
			 layout_size);

	if (errno == ENOMEM && ring->layout.tp_block_nr > 1) {
		shrink_ring_layout_generic(ring);
		goto retry;
	}
	if (ret < 0)
		panic("Cannot allocate RX_RING!\n");

	ring->mm_len = (size_t) ring->layout.tp_block_size * ring->layout.tp_block_nr;

	if (verbose) {
		if (!v3) {
			printf("RX,V2: %.2Lf MiB, %u Frames, each %u Byte allocated\n",
			       (long double) ring->mm_len / (1 << 20),
			       ring->layout.tp_frame_nr, ring->layout.tp_frame_size);
		} else {
			printf("RX,V3: %.2Lf MiB, %u Blocks, each %u Byte allocated\n",
			       (long double) ring->mm_len / (1 << 20),
			       ring->layout.tp_block_nr, ring->layout.tp_block_size);
		}
	}
}

static void alloc_rx_ring_frames(int sock, struct ring *ring)
{
	bool v3 = is_tpacket_v3(sock);

	alloc_ring_frames_generic(ring, rx_ring_get_num(ring, v3),
				  rx_ring_get_size(ring, v3));
}

void join_fanout_group(int sock, uint32_t fanout_group, uint32_t fanout_type)
{
	uint32_t fanout_opt = 0;
	int ret;

	if (fanout_group == 0)
		return;

	fanout_opt = (fanout_group & 0xffff) | (fanout_type << 16);

	ret = setsockopt(sock, SOL_PACKET, PACKET_FANOUT, &fanout_opt,
			 sizeof(fanout_opt));
	if (ret < 0)
		panic("Cannot set fanout ring mode!\n");
}

void ring_rx_setup(struct ring *ring, int sock, size_t size, int ifindex,
		   struct pollfd *poll, bool v3, bool jumbo_support,
		   bool verbose, uint32_t fanout_group, uint32_t fanout_type)
{
	fmemset(ring, 0, sizeof(*ring));
	setup_rx_ring_layout(sock, ring, size, jumbo_support, v3);
	create_rx_ring(sock, ring, verbose);
	mmap_ring_generic(sock, ring);
	alloc_rx_ring_frames(sock, ring);
	bind_ring_generic(sock, ring, ifindex, false);
	join_fanout_group(sock, fanout_group, fanout_type);
	prepare_polling(sock, poll);
}
io</a></td></tr>
<tr><th>parent</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/commit/Documentation/devicetree/bindings/iio?id=92d21ac74a9e3c09b0b01c764e530657e4c85c49'>92d21ac74a9e3c09b0b01c764e530657e4c85c49</a> (<a href='/cgit.cgi/linux/net-next.git/diff/Documentation/devicetree/bindings/iio?id=930c532869774ebf8af9efe9484c597f896a7d46&amp;id2=92d21ac74a9e3c09b0b01c764e530657e4c85c49'>diff</a>)</td></tr></table>
<div class='commit-subject'>libceph: apply new_state before new_up_client on incrementals</div><div class='commit-msg'>Currently, osd_weight and osd_state fields are updated in the encoding
order.  This is wrong, because an incremental map may look like e.g.

    new_up_client: { osd=6, addr=... } # set osd_state and addr
    new_state: { osd=6, xorstate=EXISTS } # clear osd_state

Suppose osd6's current osd_state is EXISTS (i.e. osd6 is down).  After
applying new_up_client, osd_state is changed to EXISTS | UP.  Carrying
on with the new_state update, we flip EXISTS and leave osd6 in a weird
"!EXISTS but UP" state.  A non-existent OSD is considered down by the
mapping code

2087    for (i = 0; i &lt; pg-&gt;pg_temp.len; i++) {
2088            if (ceph_osd_is_down(osdmap, pg-&gt;pg_temp.osds[i])) {
2089                    if (ceph_can_shift_osds(pi))
2090                            continue;
2091
2092                    temp-&gt;osds[temp-&gt;size++] = CRUSH_ITEM_NONE;

and so requests get directed to the second OSD in the set instead of
the first, resulting in OSD-side errors like:

[WRN] : client.4239 192.168.122.21:0/2444980242 misdirected client.4239.1:2827 pg 2.5df899f2 to osd.4 not [1,4,6] in e680/680

and hung rbds on the client:

[  493.566367] rbd: rbd0: write 400000 at 11cc00000 (0)
[  493.566805] rbd: rbd0:   result -6 xferred 400000
[  493.567011] blk_update_request: I/O error, dev rbd0, sector 9330688

The fix is to decouple application from the decoding and:
- apply new_weight first
- apply new_state before new_up_client
- twiddle osd_state flags if marking in
- clear out some of the state if osd is destroyed

Fixes: http://tracker.ceph.com/issues/14901

Cc: stable@vger.kernel.org # 3.15+: 6dd74e44dc1d: libceph: set 'exists' flag for newly up osd
Cc: stable@vger.kernel.org # 3.15+
Signed-off-by: Ilya Dryomov &lt;idryomov@gmail.com&gt;
Reviewed-by: Josh Durgin &lt;jdurgin@redhat.com&gt;
</div><div class='diffstat-header'><a href='/cgit.cgi/linux/net-next.git/diff/?id=930c532869774ebf8af9efe9484c597f896a7d46'>Diffstat</a> (limited to 'Documentation/devicetree/bindings/iio')</div><table summary='diffstat' class='diffstat'>