summaryrefslogtreecommitdiff
path: root/net/netfilter/nf_conntrack_acct.c
blob: 45da11afa785b2779857b3786881158c821263b3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/* Accouting handling for netfilter. */

/*
 * (C) 2008 Krzysztof Piotr Oledzki <ole@ans.pl>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/netfilter.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/moduleparam.h>
#include <linux/export.h>

#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_acct.h>

static bool nf_ct_acct __read_mostly;

module_param_named(acct, nf_ct_acct, bool, 0644);
MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");

#ifdef CONFIG_SYSCTL
static struct ctl_table acct_sysctl_table[] = {
	{
		.procname	= "nf_conntrack_acct",
		.data		= &init_net.ct.sysctl_acct,
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec,
	},
	{}
};
#endif /* CONFIG_SYSCTL */

unsigned int
seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
{
	struct nf_conn_acct *acct;
	struct nf_conn_counter *counter;

	acct = nf_conn_acct_find(ct);
	if (!acct)
		return 0;

	counter = acct->counter;
	seq_printf(s, "packets=%llu bytes=%llu ",
		   (unsigned long long)atomic64_read(&counter[dir].packets),
		   (unsigned long long)atomic64_read(&counter[dir].bytes));

	return 0;
};
EXPORT_SYMBOL_GPL(seq_print_acct);

static struct nf_ct_ext_type acct_extend __read_mostly = {
	.len	= sizeof(struct nf_conn_acct),
	.align	= __alignof__(struct nf_conn_acct),
	.id	= NF_CT_EXT_ACCT,
};

#ifdef CONFIG_SYSCTL
static int nf_conntrack_acct_init_sysctl(struct net *net)
{
	struct ctl_table *table;

	table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table),
			GFP_KERNEL);
	if (!table)
		goto out;

	table[0].data = &net->ct.sysctl_acct;

	/* Don't export sysctls to unprivileged users */
	if (net->user_ns != &init_user_ns)
		table[0].procname = NULL;

	net->ct.acct_sysctl_header = register_net_sysctl(net, "net/netfilter",
							 table);
	if (!net->ct.acct_sysctl_header) {
		printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n");
		goto out_register;
	}
	return 0;

out_register:
	kfree(table);
out:
	return -ENOMEM;
}

static void nf_conntrack_acct_fini_sysctl(struct net *net)
{
	struct ctl_table *table;

	table = net->ct.acct_sysctl_header->ctl_table_arg;
	unregister_net_sysctl_table(net->ct.acct_sysctl_header);
	kfree(table);
}
#else
static int nf_conntrack_acct_init_sysctl(struct net *net)
{
	return 0;
}

static void nf_conntrack_acct_fini_sysctl(struct net *net)
{
}
#endif

int nf_conntrack_acct_pernet_init(struct net *net)
{
	net->ct.sysctl_acct = nf_ct_acct;
	return nf_conntrack_acct_init_sysctl(net);
}

void nf_conntrack_acct_pernet_fini(struct net *net)
{
	nf_conntrack_acct_fini_sysctl(net);
}

int nf_conntrack_acct_init(void)
{
	int ret = nf_ct_extend_register(&acct_extend);
	if (ret < 0)
		pr_err("nf_conntrack_acct: Unable to register extension\n");
	return ret;
}

void nf_conntrack_acct_fini(void)
{
	nf_ct_extend_unregister(&acct_extend);
}
'logsubject'>net: Fix checkpatch WARNING: please, no space before tabstcharding1-71/+71 This patch fixes multiple occurrences of space before tabs warnings. More lines of code were moved than required to keep kernel-doc comments uniform. Signed-off-by: Tobin C. Harding <me@tobin.cc> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10net/act_pedit: Introduce 'add' operationAmir Vadai1-4/+26 This command could be useful to inc/dec fields. For example, to forward any TCP packet and decrease its TTL: $ tc filter add dev enp0s9 protocol ip parent ffff: \ flower ip_proto tcp \ action pedit munge ip ttl add 0xff pipe \ action mirred egress redirect dev veth0 In the example above, adding 0xff to this u8 field is actually decreasing it by one, since the operation is masked. Signed-off-by: Amir Vadai <amir@vadai.me> Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10net/act_pedit: Support using offset relative to the conventional network headersAmir Vadai1-16/+180 Extend pedit to enable the user setting offset relative to network headers. This change would enable to work with more complex header schemes (vs the simple IPv4 case) where setting a fixed offset relative to the network header is not enough. After this patch, the action has information about the exact header type and field inside this header. This information could be used later on for hardware offloading of pedit. Backward compatibility was being kept: 1. Old kernel <-> new userspace 2. New kernel <-> old userspace 3. add rule using new userspace <-> dump using old userspace 4. add rule using old userspace <-> dump using new userspace When using the extended api, new netlink attributes are being used. This way, operation will fail in (1) and (3) - and no malformed rule be added or dumped. Of course, new user space that doesn't need the new functionality can use the old netlink attributes and operation will succeed. Since action can support both api's, (2) should work, and it is easy to write the new user space to have (4) work. The action is having a strict check that only header types and commands it can handle are accepted. This way future additions will be much easier. Usage example: $ tc filter add dev enp0s9 protocol ip parent ffff: \ flower \ ip_proto tcp \ dst_port 80 \ action pedit munge tcp dport set 8080 pipe \ action mirred egress redirect dev veth0 Will forward tcp port whose original dest port is 80, while modifying the destination port to 8080. Signed-off-by: Amir Vadai <amir@vadai.me> Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10switchdev: bridge: Offload mc router portsNogah Frankel1-0/+15 Offload the mc router ports list, whenever it is being changed. It is done because in some cases mc packets needs to be flooded to all the ports in this list. Signed-off-by: Nogah Frankel <nogahf@mellanox.com> Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Ivan Vecera <ivecera@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10bridge: mcast: Merge the mc router ports deletions to one functionNogah Frankel1-15/+9 There are three places where a port gets deleted from the mc router port list. This patch join the actual deletion to one function. It will be helpful for later patch that will offload changes in the mc router ports list. Signed-off-by: Nogah Frankel <nogahf@mellanox.com> Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Ivan Vecera <ivecera@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10switchdev: bridge: Offload multicast disabledNogah Frankel1-0/+16 Offload multicast disabled flag, for more accurate mc flood behavior: When it is on, the mdb should be ignored. When it is off, unregistered mc packets should be flooded to mc router ports. Signed-off-by: Nogah Frankel <nogahf@mellanox.com> Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Ivan Vecera <ivecera@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10sched: check negative err value to safe one level of indentJiri Pirko1-13/+9 As it is more common, check err for !0. That allows to safe one level of indentation and makes the code easier to read. Also, make 'next' variable global in function as it is used twice. Signed-off-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10sched: add missing curly braces in else branch in tc_ctl_tfilterJiri Pirko1-1/+2 Curly braces need to be there, for stylistic reasons. Signed-off-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10sched: move err set right before goto errout in tc_ctl_tfilterJiri Pirko1-10/+19 This makes the reader to know right away what is the error value. Signed-off-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10sched: push TC filter protocol creation into a separate functionJiri Pirko1-51/+59 Make the long function tc_ctl_tfilter a little bit shorter and easier to read. Also make the creation of filter proto symmetric to destruction. Signed-off-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10sched: move tcf_proto_destroy and tcf_destroy_chain helpers into cls_apiJiri Pirko13-24/+32 Creation is done in this file, move destruction to be at the same place. Signed-off-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10sched: rename tcf_destroy to tcf_destroy_protoJiri Pirko2-6/+6 This function destroys TC filter protocol, not TC filter. So name it accordingly. Signed-off-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10ipv4: fib: Add events for FIB replace and appendIdo Schimmel1-13/+14 The FIB notification chain currently uses the NLM_F_{REPLACE,APPEND} flags to signal routes being replaced or appended. Instead of using netlink flags for in-kernel notifications we can simply introduce two new events in the FIB notification chain. This has the added advantage of making the API cleaner, thereby making it clear that these events should be supported by listeners of the notification chain. Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> CC: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10ipv4: fib: Send notification before deleting FIB aliasIdo Schimmel1-7/+7 When a FIB alias is replaced following NLM_F_REPLACE, the ENTRY_ADD notification is sent after the reference on the previous FIB info was dropped. This is problematic as potential listeners might need to access it in their notification blocks. Solve this by sending the notification prior to the deletion of the replaced FIB alias. This is consistent with ENTRY_DEL notifications. Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> CC: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10ipv4: fib: Send deletion notification with actual FIB alias typeIdo Schimmel1-2/+2 When a FIB alias is removed, a notification is sent using the type passed from user space - can be RTN_UNSPEC - instead of the actual type of the removed alias. This is problematic for listeners of the FIB notification chain, as several FIB aliases can exist with matching parameters, but the type. Solve this by passing the actual type of the removed FIB alias. Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> CC: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10ipv4: fib: Only flush FIB aliases belonging to currently flushed tableIdo Schimmel1-1/+2 In case the MAIN table is flushed and its trie is shared with the LOCAL table, then we might be flushing FIB aliases belonging to the latter. This can lead to FIB_ENTRY_DEL notifications sent with the wrong table ID. The above doesn't affect current listeners, as the table ID is ignored during entry deletion, but this will change later in the patchset. When flushing a particular table, skip any aliases belonging to a different one. Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> CC: Alexander Duyck <alexander.h.duyck@intel.com> CC: Patrick McHardy <kaber@trash.net> Reviewed-by: Alexander Duyck <alexander.h.duyck@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-09openvswitch: Pack struct sw_flow_key.Jarno Rajahalme4-34/+39 struct sw_flow_key has two 16-bit holes. Move the most matched conntrack match fields there. In some typical cases this reduces the size of the key that needs to be hashed into half and into one cache line. Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Joe Stringer <joe@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-09openvswitch: Add force commit.Jarno Rajahalme1-2/+24 Stateful network admission policy may allow connections to one direction and reject connections initiated in the other direction. After policy change it is possible that for a new connection an overlapping conntrack entry already exists, where the original direction of the existing connection is opposed to the new connection's initial packet. Most importantly, conntrack state relating to the current packet gets the "reply" designation based on whether the original direction tuple or the reply direction tuple matched. If this "directionality" is wrong w.r.t. to the stateful network admission policy it may happen that packets in neither direction are correctly admitted. This patch adds a new "force commit" option to the OVS conntrack action that checks the original direction of an existing conntrack entry. If that direction is opposed to the current packet, the existing conntrack entry is deleted and a new one is subsequently created in the correct direction. Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org> Acked-by: Joe Stringer <joe@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-09openvswitch: Add original direction conntrack tuple to sw_flow_key.Jarno Rajahalme