summaryrefslogtreecommitdiff
path: root/ring_tx.h
blob: adfe2a4362ed70025202e2d8fcc88982489f14c6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
/*
 * netsniff-ng - the packet sniffing beast
 * Copyright 2009, 2010 Daniel Borkmann.
 * Subject to the GPL, version 2.
 */

#ifndef TX_RING_H
#define TX_RING_H

#include <stdbool.h>

#include "ring.h"
#include "built_in.h"

/* Give userland 10 us time to push packets to the ring */
#define TX_KERNEL_PULL_INT	10

extern void destroy_tx_ring(int sock, struct ring *ring);
extern void create_tx_ring(int sock, struct ring *ring, int verbose);
extern void mmap_tx_ring(int sock, struct ring *ring);
extern void alloc_tx_ring_frames(int sock, struct ring *ring);
extern void bind_tx_ring(int sock, struct ring *ring, int ifindex);
extern void setup_tx_ring_layout(int sock, struct ring *ring,
				 unsigned int size, bool jumbo_support);
extern void set_packet_loss_discard(int sock);

static inline int user_may_pull_from_tx(struct tpacket2_hdr *hdr)
{
	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
}

static inline void kernel_may_pull_from_tx(struct tpacket2_hdr *hdr)
{
	hdr->tp_status = TP_STATUS_SEND_REQUEST;
}

static inline int pull_and_flush_tx_ring(int sock)
{
	return sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0);
}

static inline int pull_and_flush_tx_ring_wait(int sock)
{
	return sendto(sock, NULL, 0, 0, NULL, 0);
}

#endif /* TX_RING_H */
ry severe performance regression when a BDI uses BDI_CAP_STRICTLIMIT because balance_dirty_pages() and the writeback worker can now disagree on whether writeback should be initiated. For example, in a system having 1GB of RAM, a single spinning disk, and a "pass-through" FUSE filesystem mounted over the disk, application code mmapped a 128MB file on the disk and was randomly dirtying pages in that mapping. Because FUSE uses strictlimit and has a default max_ratio of only 1%, in balance_dirty_pages, thresh is ~200, bg_thresh is ~100, and the dirty_freerun_ceiling is the average of those, ~150. So, it pauses the dirtying processes when we have 151 dirty pages and wakes up a background writeback worker. But the worker tests the wrong threshold (200 instead of 100), so it does not initiate writeback and just returns. Thus, balance_dirty_pages keeps looping, sleeping and then waking up the worker who will do nothing. It remains stuck in this state until the few dirty pages that we have finally expire and we write them back for that reason. Then the whole process repeats, resulting in near-zero throughput through the FUSE BDI. The fix is to call the parameterized variant of wb_calc_thresh, so that the worker will do writeback if the bg_thresh is exceeded which was the behavior before the referenced commit. Fixes: 947e9762a8dd ("writeback: update wb_over_bg_thresh() to use wb_domain aware operations") Signed-off-by: Howard Cochran <hcochran@kernelspring.com> Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com> Cc: <stable@vger.kernel.org> # v4.2+ Tested-by Sedat Dilek <sedat.dilek@gmail.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'Documentation')