diff options
| author | Daniel Borkmann <dborkman@redhat.com> | 2013-05-31 15:07:15 +0200 | 
|---|---|---|
| committer | Daniel Borkmann <dborkman@redhat.com> | 2013-05-31 15:07:15 +0200 | 
| commit | d8cdc6ab87550de9c93b1f6763ea6015f292d7fb (patch) | |
| tree | e45481799a4436a4c5cb091151e2475e4da7df5c | |
| parent | cacb34f95fb7937e81b1fe8add7b5dca91e3a4c5 (diff) | |
ring: netsniff-ng: migrate capture only to TPACKET_V3
Lets migrate capturing to TPACKET_V3, since it will bring a better
performance due to fewer page cache misses caused by a higher density
of packets, since now they are contigous placed in the ring buffer.
It is said that TPACKET_V3 brings the following benefits:
 *) ~15 - 20% reduction in CPU-usage
 *) ~20% increase in packet capture rate
 *) ~2x increase in packet density
 *) Port aggregation analysis
 *) Non static frame size to capture entire packet payload
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
| -rw-r--r-- | dissector.h | 29 | ||||
| -rw-r--r-- | netsniff-ng.c | 154 | ||||
| -rw-r--r-- | pcap_io.h | 101 | ||||
| -rw-r--r-- | ring.h | 7 | ||||
| -rw-r--r-- | ring_rx.c | 2 | ||||
| -rw-r--r-- | xutils.c | 9 | ||||
| -rw-r--r-- | xutils.h | 2 | 
7 files changed, 176 insertions, 128 deletions
| diff --git a/dissector.h b/dissector.h index d211e06..2c2c128 100644 --- a/dissector.h +++ b/dissector.h @@ -9,6 +9,7 @@  #include <stdlib.h>  #include <stdint.h> +#include <linux/if_packet.h>  #include "ring.h"  #include "tprintf.h" @@ -45,31 +46,41 @@ static inline const char *__show_ts_source(uint32_t status)  		return "";  } -static inline void show_frame_hdr(struct frame_map *hdr, int mode) +static inline void __show_frame_hdr(struct sockaddr_ll *s_ll, +				    void *raw, int mode, bool v3)  {  	char tmp[IFNAMSIZ]; +	union tpacket_uhdr hdr;  	if (mode == PRINT_NONE)  		return; +	hdr.raw = raw; +  	switch (mode) {  	case PRINT_LESS:  		tprintf("%s %s %u", -			packet_types[hdr->s_ll.sll_pkttype] ? : "?", -			if_indextoname(hdr->s_ll.sll_ifindex, tmp) ? : "?", -			hdr->tp_h.tp_len); +			packet_types[s_ll->sll_pkttype] ? : "?", +			if_indextoname(s_ll->sll_ifindex, tmp) ? : "?", +			v3 ? hdr.h3->tp_len : hdr.h2->tp_len);  		break;  	default:  		tprintf("%s %s %u %us.%uns %s\n", -			packet_types[hdr->s_ll.sll_pkttype] ? : "?", -			if_indextoname(hdr->s_ll.sll_ifindex, tmp) ? : "?", -			hdr->tp_h.tp_len, hdr->tp_h.tp_sec, -			hdr->tp_h.tp_nsec, -			__show_ts_source(hdr->tp_h.tp_status)); +			packet_types[s_ll->sll_pkttype] ? : "?", +			if_indextoname(s_ll->sll_ifindex, tmp) ? : "?", +			v3 ? hdr.h3->tp_len : hdr.h2->tp_len, +			v3 ? hdr.h3->tp_sec : hdr.h2->tp_sec, +			v3 ? hdr.h3->tp_nsec : hdr.h2->tp_nsec, +			v3 ? "" : __show_ts_source(hdr.h2->tp_status));  		break;  	}  } +static inline void show_frame_hdr(struct frame_map *hdr, int mode) +{ +	__show_frame_hdr(&hdr->s_ll, &hdr->tp_h, mode, false); +} +  extern void dissector_init_all(int fnttype);  extern void dissector_entry_point(uint8_t *packet, size_t len, int linktype, int mode);  extern void dissector_cleanup_all(void); diff --git a/netsniff-ng.c b/netsniff-ng.c index e5e91ed..4211386 100644 --- a/netsniff-ng.c +++ b/netsniff-ng.c @@ -475,7 +475,7 @@ static void receive_to_xmit(struct ctx *ctx)  	timer_purge(); -	sock_print_net_stats(rx_sock, 0); +	sock_print_net_stats(rx_sock);  	bpf_release(&bpf_ops); @@ -788,10 +788,9 @@ static int begin_single_pcap_file(struct ctx *ctx)  	return fd;  } -static void print_pcap_file_stats(int sock, struct ctx *ctx, unsigned long skipped) +static void print_pcap_file_stats(int sock, struct ctx *ctx)  {  	int ret; -	unsigned long good, bad;  	struct tpacket_stats kstats;  	socklen_t slen = sizeof(kstats); @@ -802,27 +801,89 @@ static void print_pcap_file_stats(int sock, struct ctx *ctx, unsigned long skipp  		panic("Cannot get packet statistics!\n");  	if (ctx->print_mode == PRINT_NONE) { -		good = kstats.tp_packets - kstats.tp_drops - skipped; -		bad = kstats.tp_drops + skipped; - -		printf(".(+%lu/-%lu)", good, bad); +		printf(".(+%u/-%u)", kstats.tp_packets - kstats.tp_drops, +		       kstats.tp_drops);  		fflush(stdout);  	}  } -static void recv_only_or_dump(struct ctx *ctx) +static void walk_t3_block(struct block_desc *pbd, struct ctx *ctx, +			  int sock, int fd)  {  	uint8_t *packet; +	int num_pkts = pbd->h1.num_pkts, i, ret; +	unsigned long frame_count = 0; +	struct tpacket3_hdr *hdr; +	pcap_pkthdr_t phdr; +	struct sockaddr_ll *sll; + +	hdr = (void *) ((uint8_t *) pbd + pbd->h1.offset_to_first_pkt); +	sll = (void *) ((uint8_t *) hdr + TPACKET_ALIGN(sizeof(*hdr))); + +	for (i = 0; i < num_pkts && likely(sigint == 0); ++i) { +		__label__ next; +		packet = ((uint8_t *) hdr + hdr->tp_mac); +		frame_count++; + +		if (ctx->packet_type != -1) +			if (ctx->packet_type != sll->sll_pkttype) +				goto next; + +		if (dump_to_pcap(ctx)) { +			tpacket3_hdr_to_pcap_pkthdr(hdr, sll, &phdr, ctx->magic); + +			ret = __pcap_io->write_pcap(fd, &phdr, ctx->magic, packet, +						    pcap_get_length(&phdr, ctx->magic)); +			if (unlikely(ret != pcap_get_total_length(&phdr, ctx->magic))) +				panic("Write error to pcap!\n"); +		} + +		__show_frame_hdr(sll, hdr, ctx->print_mode, true); + +		dissector_entry_point(packet, hdr->tp_snaplen, ctx->link_type, +				      ctx->print_mode); +		next: + +                hdr = (void *) ((uint8_t *) hdr + hdr->tp_next_offset); +		sll = (void *) ((uint8_t *) hdr + TPACKET_ALIGN(sizeof(*hdr))); + +		if (frame_count_max != 0) { +			if (frame_count >= frame_count_max) { +				sigint = 1; +				break; +			} +		} + +		if (dump_to_pcap(ctx)) { +			if (ctx->dump_mode == DUMP_INTERVAL_SIZE) { +				interval += hdr->tp_snaplen; +				if (interval > ctx->dump_interval) { +					next_dump = true; +					interval = 0; +				} +			} + +			if (next_dump) { +				fd = next_multi_pcap_file(ctx, fd); +				next_dump = false; + +				if (unlikely(ctx->verbose)) +					print_pcap_file_stats(sock, ctx); +			} +		} +	} +} + +static void recv_only_or_dump(struct ctx *ctx) +{  	short ifflags = 0;  	int sock, irq, ifindex, fd = 0, ret;  	unsigned int size, it = 0; -	unsigned long frame_count = 0, skipped = 0;  	struct ring rx_ring;  	struct pollfd rx_poll; -	struct frame_map *hdr;  	struct sock_fprog bpf_ops;  	struct timeval start, end, diff; -	pcap_pkthdr_t phdr; +	struct block_desc *pbd;  	sock = pf_socket(); @@ -851,7 +912,7 @@ static void recv_only_or_dump(struct ctx *ctx)  	set_sockopt_hwtimestamp(sock, ctx->device_in); -	setup_rx_ring_layout(sock, &rx_ring, size, ctx->jumbo, false); +	setup_rx_ring_layout(sock, &rx_ring, size, ctx->jumbo, true);  	create_rx_ring(sock, &rx_ring, ctx->verbose);  	mmap_rx_ring(sock, &rx_ring);  	alloc_rx_ring_frames(sock, &rx_ring); @@ -903,72 +964,15 @@ static void recv_only_or_dump(struct ctx *ctx)  	bug_on(gettimeofday(&start, NULL));  	while (likely(sigint == 0)) { -		while (user_may_pull_from_rx(rx_ring.frames[it].iov_base)) { -			__label__ next; +		while (user_may_pull_from_rx_block((pbd = (void *) +				rx_ring.frames[it].iov_base))) { +			walk_t3_block(pbd, ctx, sock, fd); -			hdr = rx_ring.frames[it].iov_base; -			packet = ((uint8_t *) hdr) + hdr->tp_h.tp_mac; -			frame_count++; - -			if (ctx->packet_type != -1) -				if (ctx->packet_type != hdr->s_ll.sll_pkttype) -					goto next; - -			if (unlikely(ring_frame_size(&rx_ring) < hdr->tp_h.tp_snaplen)) { -				skipped++; -				goto next; -			} - -			if (dump_to_pcap(ctx)) { -				tpacket_hdr_to_pcap_pkthdr(&hdr->tp_h, &hdr->s_ll, &phdr, ctx->magic); - -				ret = __pcap_io->write_pcap(fd, &phdr, ctx->magic, packet, -							    pcap_get_length(&phdr, ctx->magic)); -				if (unlikely(ret != pcap_get_total_length(&phdr, ctx->magic))) -					panic("Write error to pcap!\n"); -			} - -			show_frame_hdr(hdr, ctx->print_mode); - -			dissector_entry_point(packet, hdr->tp_h.tp_snaplen, -					      ctx->link_type, ctx->print_mode); - -			if (frame_count_max != 0) { -				if (frame_count >= frame_count_max) { -					sigint = 1; -					break; -				} -			} - -			next: - -			kernel_may_pull_from_rx(&hdr->tp_h); - -			it++; -			if (it >= rx_ring.layout.tp_frame_nr) -				it = 0; +			kernel_may_pull_from_rx_block(pbd); +			it = (it + 1) % rx_ring.layout3.tp_block_nr;  			if (unlikely(sigint == 1))  				break; - -			if (dump_to_pcap(ctx)) { -				if (ctx->dump_mode == DUMP_INTERVAL_SIZE) { -					interval += hdr->tp_h.tp_snaplen; - -					if (interval > ctx->dump_interval) { -						next_dump = true; -						interval = 0; -					} -				} - -				if (next_dump) { -					fd = next_multi_pcap_file(ctx, fd); -					next_dump = false; - -					if (ctx->verbose) -						print_pcap_file_stats(sock, ctx, skipped); -				} -			}  		}  		poll(&rx_poll, 1, -1); @@ -978,7 +982,7 @@ static void recv_only_or_dump(struct ctx *ctx)  	timersub(&end, &start, &diff);  	if (!(ctx->dump_dir && ctx->print_mode == PRINT_NONE)) { -		sock_print_net_stats(sock, skipped); +		sock_print_net_stats(sock);  		printf("\r%12lu  sec, %lu usec in total\n",  		       diff.tv_sec, diff.tv_usec); @@ -274,66 +274,67 @@ static inline u32 pcap_get_total_length(pcap_pkthdr_t *phdr, enum pcap_type type  	}  } -static inline void tpacket_hdr_to_pcap_pkthdr(struct tpacket2_hdr *thdr, -					      struct sockaddr_ll *sll, -					      pcap_pkthdr_t *phdr, -					      enum pcap_type type) +static inline void +__tpacket_hdr_to_pcap_pkthdr(uint32_t sec, uint32_t nsec, uint32_t snaplen, +			     uint32_t len, uint32_t status, +			     struct sockaddr_ll *sll, pcap_pkthdr_t *phdr, +			     enum pcap_type type)  {  	switch (type) {  	case DEFAULT: -		phdr->ppo.ts.tv_sec = thdr->tp_sec; -		phdr->ppo.ts.tv_usec = thdr->tp_nsec / 1000; -		phdr->ppo.caplen = thdr->tp_snaplen; -		phdr->ppo.len = thdr->tp_len; +		phdr->ppo.ts.tv_sec = sec; +		phdr->ppo.ts.tv_usec = nsec / 1000; +		phdr->ppo.caplen = snaplen; +		phdr->ppo.len = len;  		break;  	case DEFAULT_SWAPPED: -		phdr->ppo.ts.tv_sec = ___constant_swab32(thdr->tp_sec); -		phdr->ppo.ts.tv_usec = ___constant_swab32(thdr->tp_nsec / 1000); -		phdr->ppo.caplen = ___constant_swab32(thdr->tp_snaplen); -		phdr->ppo.len = ___constant_swab32(thdr->tp_len); +		phdr->ppo.ts.tv_sec = ___constant_swab32(sec); +		phdr->ppo.ts.tv_usec = ___constant_swab32(nsec / 1000); +		phdr->ppo.caplen = ___constant_swab32(snaplen); +		phdr->ppo.len = ___constant_swab32(len);  		break;  	case NSEC: -		phdr->ppn.ts.tv_sec = thdr->tp_sec; -		phdr->ppn.ts.tv_nsec = thdr->tp_nsec; -		phdr->ppn.caplen = thdr->tp_snaplen; -		phdr->ppn.len = thdr->tp_len; +		phdr->ppn.ts.tv_sec = sec; +		phdr->ppn.ts.tv_nsec = nsec; +		phdr->ppn.caplen = snaplen; +		phdr->ppn.len = len;  		break;  	case NSEC_SWAPPED: -		phdr->ppn.ts.tv_sec = ___constant_swab32(thdr->tp_sec); -		phdr->ppn.ts.tv_nsec = ___constant_swab32(thdr->tp_nsec); -		phdr->ppn.caplen = ___constant_swab32(thdr->tp_snaplen); -		phdr->ppn.len = ___constant_swab32(thdr->tp_len); +		phdr->ppn.ts.tv_sec = ___constant_swab32(sec); +		phdr->ppn.ts.tv_nsec = ___constant_swab32(nsec); +		phdr->ppn.caplen = ___constant_swab32(snaplen); +		phdr->ppn.len = ___constant_swab32(len);  		break;  	case KUZNETZOV: -		phdr->ppk.ts.tv_sec = thdr->tp_sec; -		phdr->ppk.ts.tv_usec = thdr->tp_nsec / 1000; -		phdr->ppk.caplen = thdr->tp_snaplen; -		phdr->ppk.len = thdr->tp_len; +		phdr->ppk.ts.tv_sec = sec; +		phdr->ppk.ts.tv_usec = nsec / 1000; +		phdr->ppk.caplen = snaplen; +		phdr->ppk.len = len;  		phdr->ppk.ifindex = sll->sll_ifindex;  		phdr->ppk.protocol = sll->sll_protocol;  		phdr->ppk.pkttype = sll->sll_pkttype;  		break;  	case KUZNETZOV_SWAPPED: -		phdr->ppk.ts.tv_sec = ___constant_swab32(thdr->tp_sec); -		phdr->ppk.ts.tv_usec = ___constant_swab32(thdr->tp_nsec / 1000); -		phdr->ppk.caplen = ___constant_swab32(thdr->tp_snaplen); -		phdr->ppk.len = ___constant_swab32(thdr->tp_len); +		phdr->ppk.ts.tv_sec = ___constant_swab32(sec); +		phdr->ppk.ts.tv_usec = ___constant_swab32(nsec / 1000); +		phdr->ppk.caplen = ___constant_swab32(snaplen); +		phdr->ppk.len = ___constant_swab32(len);  		phdr->ppk.ifindex = ___constant_swab32(sll->sll_ifindex);  		phdr->ppk.protocol = ___constant_swab16(sll->sll_protocol);  		phdr->ppk.pkttype = sll->sll_pkttype;  		break;  	case BORKMANN: -		phdr->ppb.ts.tv_sec = thdr->tp_sec; -		phdr->ppb.ts.tv_nsec = thdr->tp_nsec; -		phdr->ppb.caplen = thdr->tp_snaplen; -		phdr->ppb.len = thdr->tp_len; -		phdr->ppb.tsource = tp_to_pcap_tsource(thdr->tp_status); +		phdr->ppb.ts.tv_sec = sec; +		phdr->ppb.ts.tv_nsec = nsec; +		phdr->ppb.caplen = snaplen; +		phdr->ppb.len = len; +		phdr->ppb.tsource = tp_to_pcap_tsource(status);  		phdr->ppb.ifindex = (u16) sll->sll_ifindex;  		phdr->ppb.protocol = sll->sll_protocol;  		phdr->ppb.hatype = sll->sll_hatype; @@ -341,11 +342,11 @@ static inline void tpacket_hdr_to_pcap_pkthdr(struct tpacket2_hdr *thdr,  		break;  	case BORKMANN_SWAPPED: -		phdr->ppb.ts.tv_sec = ___constant_swab32(thdr->tp_sec); -		phdr->ppb.ts.tv_nsec = ___constant_swab32(thdr->tp_nsec); -		phdr->ppb.caplen = ___constant_swab32(thdr->tp_snaplen); -		phdr->ppb.len = ___constant_swab32(thdr->tp_len); -		phdr->ppb.tsource = ___constant_swab16(tp_to_pcap_tsource(thdr->tp_status)); +		phdr->ppb.ts.tv_sec = ___constant_swab32(sec); +		phdr->ppb.ts.tv_nsec = ___constant_swab32(nsec); +		phdr->ppb.caplen = ___constant_swab32(snaplen); +		phdr->ppb.len = ___constant_swab32(len); +		phdr->ppb.tsource = ___constant_swab16(tp_to_pcap_tsource(status));  		phdr->ppb.ifindex = ___constant_swab16((u16) sll->sll_ifindex);  		phdr->ppb.protocol = ___constant_swab16(sll->sll_protocol);  		phdr->ppb.hatype = sll->sll_hatype; @@ -357,6 +358,30 @@ static inline void tpacket_hdr_to_pcap_pkthdr(struct tpacket2_hdr *thdr,  	}  } +/* We need to do this crap here since member offsets are not interleaved, + * so hopfully the compiler does his job here. ;-) + */ + +static inline void tpacket_hdr_to_pcap_pkthdr(struct tpacket2_hdr *thdr, +					      struct sockaddr_ll *sll, +					      pcap_pkthdr_t *phdr, +					      enum pcap_type type) +{ +	__tpacket_hdr_to_pcap_pkthdr(thdr->tp_sec, thdr->tp_nsec, +				     thdr->tp_snaplen, thdr->tp_len, +				     thdr->tp_status, sll, phdr, type); +} + +static inline void tpacket3_hdr_to_pcap_pkthdr(struct tpacket3_hdr *thdr, +					       struct sockaddr_ll *sll, +					       pcap_pkthdr_t *phdr, +					       enum pcap_type type) +{ +	__tpacket_hdr_to_pcap_pkthdr(thdr->tp_sec, thdr->tp_nsec, +				     thdr->tp_snaplen, thdr->tp_len, +				     0, sll, phdr, type); +} +  static inline void pcap_pkthdr_to_tpacket_hdr(pcap_pkthdr_t *phdr,  					      enum pcap_type type,  					      struct tpacket2_hdr *thdr, @@ -26,6 +26,13 @@  #include "built_in.h"  #include "die.h" +union tpacket_uhdr { +	struct tpacket_hdr  *h1; +	struct tpacket2_hdr *h2; +	struct tpacket3_hdr *h3; +	void *raw; +}; +  struct frame_map {  	struct tpacket2_hdr tp_h __aligned_tpacket;  	struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr)); @@ -59,7 +59,7 @@ void setup_rx_ring_layout(int sock, struct ring *ring, unsigned int size,  			     sizeof(struct tpacket_req) !=  			     offsetof(struct tpacket_req3, tp_retire_blk_tov)); -		ring->layout3.tp_retire_blk_tov = 0; +		ring->layout3.tp_retire_blk_tov = 100; /* 0: let kernel decide */  		ring->layout3.tp_sizeof_priv = 0;  		ring->layout3.tp_feature_req_word = 0; @@ -716,7 +716,7 @@ int device_bind_irq_to_cpu(int irq, int cpu)  	return (ret > 0 ? 0 : ret);  } -void sock_print_net_stats(int sock, unsigned long skipped) +void sock_print_net_stats(int sock)  {  	int ret;  	struct tpacket_stats kstats; @@ -730,10 +730,11 @@ void sock_print_net_stats(int sock, unsigned long skipped)  		uint64_t drops = kstats.tp_drops;  		printf("\r%12ld  packets incoming\n", packets); -		printf("\r%12ld  packets passed filter\n", packets - drops - skipped); -		printf("\r%12ld  packets failed filter (out of space)\n", drops + skipped); +		printf("\r%12ld  packets passed filter\n", packets - drops); +		printf("\r%12ld  packets failed filter (out of space)\n", drops);  		if (kstats.tp_packets > 0) -			printf("\r%12.4lf%\% packet droprate\n", (1.0 * drops / packets) * 100.0); +			printf("\r%12.4lf%\% packet droprate\n", +			       (1.0 * drops / packets) * 100.0);  	}  } @@ -39,7 +39,7 @@ extern int device_address(const char *ifname, int af, struct sockaddr_storage *s  extern int device_irq_number(const char *ifname);  extern int device_set_irq_affinity_list(int irq, unsigned long from, unsigned long to);  extern int device_bind_irq_to_cpu(int irq, int cpu); -extern void sock_print_net_stats(int sock, unsigned long skipped); +extern void sock_print_net_stats(int sock);  extern int device_ifindex(const char *ifname);  extern short device_get_flags(const char *ifname);  extern void device_set_flags(const char *ifname, const short flags); | 
