summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_hybla.c
blob: 0f7175c3338e062a4a6507aacfdebc89e97a1948 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
/*
 * TCP HYBLA
 *
 * TCP-HYBLA Congestion control algorithm, based on:
 *   C.Caini, R.Firrincieli, "TCP-Hybla: A TCP Enhancement
 *   for Heterogeneous Networks",
 *   International Journal on satellite Communications,
 *				       September 2004
 *    Daniele Lacamera
 *    root at danielinux.net
 */

#include <linux/module.h>
#include <net/tcp.h>

/* Tcp Hybla structure. */
struct hybla {
	bool  hybla_en;
	u32   snd_cwnd_cents; /* Keeps increment values when it is <1, <<7 */
	u32   rho;	      /* Rho parameter, integer part  */
	u32   rho2;	      /* Rho * Rho, integer part */
	u32   rho_3ls;	      /* Rho parameter, <<3 */
	u32   rho2_7ls;	      /* Rho^2, <<7	*/
	u32   minrtt_us;      /* Minimum smoothed round trip time value seen */
};

/* Hybla reference round trip time (default= 1/40 sec = 25 ms), in ms */
static int rtt0 = 25;
module_param(rtt0, int, 0644);
MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)");

/* This is called to refresh values for hybla parameters */
static inline void hybla_recalc_param (struct sock *sk)
{
	struct hybla *ca = inet_csk_ca(sk);

	ca->rho_3ls = max_t(u32,
			    tcp_sk(sk)->srtt_us / (rtt0 * USEC_PER_MSEC),
			    8U);
	ca->rho = ca->rho_3ls >> 3;
	ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1;
	ca->rho2 = ca->rho2_7ls >> 7;
}

static void hybla_init(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct hybla *ca = inet_csk_ca(sk);

	ca->rho = 0;
	ca->rho2 = 0;
	ca->rho_3ls = 0;
	ca->rho2_7ls = 0;
	ca->snd_cwnd_cents = 0;
	ca->hybla_en = true;
	tp->snd_cwnd = 2;
	tp->snd_cwnd_clamp = 65535;

	/* 1st Rho measurement based on initial srtt */
	hybla_recalc_param(sk);

	/* set minimum rtt as this is the 1st ever seen */
	ca->minrtt_us = tp->srtt_us;
	tp->snd_cwnd = ca->rho;
}

static void hybla_state(struct sock *sk, u8 ca_state)
{
	struct hybla *ca = inet_csk_ca(sk);

	ca->hybla_en = (ca_state == TCP_CA_Open);
}

static inline u32 hybla_fraction(u32 odds)
{
	static const u32 fractions[] = {
		128, 139, 152, 165, 181, 197, 215, 234,
	};

	return (odds < ARRAY_SIZE(fractions)) ? fractions[odds] : 128;
}

/* TCP Hybla main routine.
 * This is the algorithm behavior:
 *     o Recalc Hybla parameters if min_rtt has changed
 *     o Give cwnd a new value based on the model proposed
 *     o remember increments <1
 */
static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct hybla *ca = inet_csk_ca(sk);
	u32 increment, odd, rho_fractions;
	int is_slowstart = 0;

	/*  Recalculate rho only if this srtt is the lowest */
	if (tp->srtt_us < ca->minrtt_us) {
		hybla_recalc_param(sk);
		ca->minrtt_us = tp->srtt_us;
	}

	if (!tcp_is_cwnd_limited(sk))
		return;

	if (!ca->hybla_en) {
		tcp_reno_cong_avoid(sk, ack, acked);
		return;
	}

	if (ca->rho == 0)
		hybla_recalc_param(sk);

	rho_fractions = ca->rho_3ls - (ca->rho << 3);

	if (tcp_in_slow_start(tp)) {
		/*
		 * slow start
		 *      INC = 2^RHO - 1
		 * This is done by splitting the rho parameter
		 * into 2 parts: an integer part and a fraction part.
		 * Inrement<<7 is estimated by doing:
		 *	       [2^(int+fract)]<<7
		 * that is equal to:
		 *	       (2^int)	*  [(2^fract) <<7]
		 * 2^int is straightly computed as 1<<int,
		 * while we will use hybla_slowstart_fraction_increment() to
		 * calculate 2^fract in a <<7 value.
		 */
		is_slowstart = 1;
		increment = ((1 << min(ca->rho, 16U)) *
			hybla_fraction(rho_fractions)) - 128;
	} else {
		/*
		 * congestion avoidance
		 * INC = RHO^2 / W
		 * as long as increment is estimated as (rho<<7)/window
		 * it already is <<7 and we can easily count its fractions.
		 */
		increment = ca->rho2_7ls / tp->snd_cwnd;
		if (increment < 128)
			tp->snd_cwnd_cnt++;
	}

	odd = increment % 128;
	tp->snd_cwnd += increment >> 7;
	ca->snd_cwnd_cents += odd;

	/* check when fractions goes >=128 and increase cwnd by 1. */
	while (ca->snd_cwnd_cents >= 128) {
		tp->snd_cwnd++;
		ca->snd_cwnd_cents -= 128;
		tp->snd_cwnd_cnt = 0;
	}
	/* check when cwnd has not been incremented for a while */
	if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) {
		tp->snd_cwnd++;
		tp->snd_cwnd_cnt = 0;
	}
	/* clamp down slowstart cwnd to ssthresh value. */
	if (is_slowstart)
		tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);

	tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
}

static struct tcp_congestion_ops tcp_hybla __read_mostly = {
	.init		= hybla_init,
	.ssthresh	= tcp_reno_ssthresh,
	.undo_cwnd	= tcp_reno_undo_cwnd,
	.cong_avoid	= hybla_cong_avoid,
	.set_state	= hybla_state,

	.owner		= THIS_MODULE,
	.name		= "hybla"
};

static int __init hybla_register(void)
{
	BUILD_BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
	return tcp_register_congestion_control(&tcp_hybla);
}

static void __exit hybla_unregister(void)
{
	tcp_unregister_congestion_control(&tcp_hybla);
}

module_init(hybla_register);
module_exit(hybla_unregister);

MODULE_AUTHOR("Daniele Lacamera");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("TCP Hybla");
1-3/+9 Dmitry reported warnings occurring in __skb_gso_segment() [1] All SKB_GSO_DODGY producers can allow user space to feed packets that trigger the current check. We could prevent them from doing so, rejecting packets, but this might add regressions to existing programs. It turns out our SKB_GSO_DODGY handlers properly set up checksum information that is needed anyway when packets needs to be segmented. By checking again skb_needs_check() after skb_mac_gso_segment(), we should remove these pesky warnings, at a very minor cost. With help from Willem de Bruijn [1] WARNING: CPU: 1 PID: 6768 at net/core/dev.c:2439 skb_warn_bad_offload+0x2af/0x390 net/core/dev.c:2434 lo: caps=(0x000000a2803b7c69, 0x0000000000000000) len=138 data_len=0 gso_size=15883 gso_type=4 ip_summed=0 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 6768 Comm: syz-executor1 Not tainted 4.9.0 #5 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ffff8801c063ecd8 ffffffff82346bdf ffffffff00000001 1ffff100380c7d2e ffffed00380c7d26 0000000041b58ab3 ffffffff84b37e38 ffffffff823468f1 ffffffff84820740 ffffffff84f289c0 dffffc0000000000 ffff8801c063ee20 Call Trace: [<ffffffff82346bdf>] __dump_stack lib/dump_stack.c:15 [inline] [<ffffffff82346bdf>] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 [<ffffffff81827e34>] panic+0x1fb/0x412 kernel/panic.c:179 [<ffffffff8141f704>] __warn+0x1c4/0x1e0 kernel/panic.c:542 [<ffffffff8141f7e5>] warn_slowpath_fmt+0xc5/0x100 kernel/panic.c:565 [<ffffffff8356cbaf>] skb_warn_bad_offload+0x2af/0x390 net/core/dev.c:2434 [<ffffffff83585cd2>] __skb_gso_segment+0x482/0x780 net/core/dev.c:2706 [<ffffffff83586f19>] skb_gso_segment include/linux/netdevice.h:3985 [inline] [<ffffffff83586f19>] validate_xmit_skb+0x5c9/0xc20 net/core/dev.c:2969 [<ffffffff835892bb>] __dev_queue_xmit+0xe6b/0x1e70 net/core/dev.c:3383 [<ffffffff8358a2d7>] dev_queue_xmit+0x17/0x20 net/core/dev.c:3424 [<ffffffff83ad161d>] packet_snd net/packet/af_packet.c:2930 [inline] [<ffffffff83ad161d>] packet_sendmsg+0x32ed/0x4d30 net/packet/af_packet.c:2955 [<ffffffff834f0aaa>] sock_sendmsg_nosec net/socket.c:621 [inline] [<ffffffff834f0aaa>] sock_sendmsg+0xca/0x110 net/socket.c:631 [<ffffffff834f329a>] ___sys_sendmsg+0x8fa/0x9f0 net/socket.c:1954 [<ffffffff834f5e58>] __sys_sendmsg+0x138/0x300 net/socket.c:1988 [<ffffffff834f604d>] SYSC_sendmsg net/socket.c:1999 [inline] [<ffffffff834f604d>] SyS_sendmsg+0x2d/0x50 net/socket.c:1995 [<ffffffff84371941>] entry_SYSCALL_64_fastpath+0x1f/0xc2 Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Dmitry Vyukov <dvyukov@google.com> Cc: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-01net/sched: matchall: Fix configuration raceYotam Gigi1-82/+45 In the current version, the matchall internal state is split into two structs: cls_matchall_head and cls_matchall_filter. This makes little sense, as matchall instance supports only one filter, and there is no situation where one exists and the other does not. In addition, that led to some races when filter was deleted while packet was processed. Unify that two structs into one, thus simplifying the process of matchall creation and deletion. As a result, the new, delete and get callbacks have a dummy implementation where all the work is done in destroy and change callbacks, as was done in cls_cgroup. Fixes: bf3994d2ed31 ("net/sched: introduce Match-all classifier") Reported-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-01rtnetlink: Handle IFLA_MASTER parameter when processing rtnl_newlinkTheuns Verwoerd1-1/+6 Allow a master interface to be specified as one of the parameters when creating a new interface via rtnl_newlink. Previously this would require invoking interface creation, waiting for it to complete, and then separately binding that new interface to a master. In particular, this is used when creating a macvlan child interface for VRRP in a VRF configuration, allowing the interface creator to specify directly what master interface should be inherited by the child, without having to deal with asynchronous complications and potential race conditions. Signed-off-by: Theuns Verwoerd <theuns.verwoerd@alliedtelesis.co.nz> Acked-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>