summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Borkmann <dborkman@redhat.com>2013-05-31 15:07:15 +0200
committerDaniel Borkmann <dborkman@redhat.com>2013-05-31 15:07:15 +0200
commitd8cdc6ab87550de9c93b1f6763ea6015f292d7fb (patch)
treee45481799a4436a4c5cb091151e2475e4da7df5c
parentcacb34f95fb7937e81b1fe8add7b5dca91e3a4c5 (diff)
ring: netsniff-ng: migrate capture only to TPACKET_V3
Lets migrate capturing to TPACKET_V3, since it will bring a better performance due to fewer page cache misses caused by a higher density of packets, since now they are contigous placed in the ring buffer. It is said that TPACKET_V3 brings the following benefits: *) ~15 - 20% reduction in CPU-usage *) ~20% increase in packet capture rate *) ~2x increase in packet density *) Port aggregation analysis *) Non static frame size to capture entire packet payload Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
-rw-r--r--dissector.h29
-rw-r--r--netsniff-ng.c154
-rw-r--r--pcap_io.h101
-rw-r--r--ring.h7
-rw-r--r--ring_rx.c2
-rw-r--r--xutils.c9
-rw-r--r--xutils.h2
7 files changed, 176 insertions, 128 deletions
diff --git a/dissector.h b/dissector.h
index d211e06..2c2c128 100644
--- a/dissector.h
+++ b/dissector.h
@@ -9,6 +9,7 @@
#include <stdlib.h>
#include <stdint.h>
+#include <linux/if_packet.h>
#include "ring.h"
#include "tprintf.h"
@@ -45,31 +46,41 @@ static inline const char *__show_ts_source(uint32_t status)
return "";
}
-static inline void show_frame_hdr(struct frame_map *hdr, int mode)
+static inline void __show_frame_hdr(struct sockaddr_ll *s_ll,
+ void *raw, int mode, bool v3)
{
char tmp[IFNAMSIZ];
+ union tpacket_uhdr hdr;
if (mode == PRINT_NONE)
return;
+ hdr.raw = raw;
+
switch (mode) {
case PRINT_LESS:
tprintf("%s %s %u",
- packet_types[hdr->s_ll.sll_pkttype] ? : "?",
- if_indextoname(hdr->s_ll.sll_ifindex, tmp) ? : "?",
- hdr->tp_h.tp_len);
+ packet_types[s_ll->sll_pkttype] ? : "?",
+ if_indextoname(s_ll->sll_ifindex, tmp) ? : "?",
+ v3 ? hdr.h3->tp_len : hdr.h2->tp_len);
break;
default:
tprintf("%s %s %u %us.%uns %s\n",
- packet_types[hdr->s_ll.sll_pkttype] ? : "?",
- if_indextoname(hdr->s_ll.sll_ifindex, tmp) ? : "?",
- hdr->tp_h.tp_len, hdr->tp_h.tp_sec,
- hdr->tp_h.tp_nsec,
- __show_ts_source(hdr->tp_h.tp_status));
+ packet_types[s_ll->sll_pkttype] ? : "?",
+ if_indextoname(s_ll->sll_ifindex, tmp) ? : "?",
+ v3 ? hdr.h3->tp_len : hdr.h2->tp_len,
+ v3 ? hdr.h3->tp_sec : hdr.h2->tp_sec,
+ v3 ? hdr.h3->tp_nsec : hdr.h2->tp_nsec,
+ v3 ? "" : __show_ts_source(hdr.h2->tp_status));
break;
}
}
+static inline void show_frame_hdr(struct frame_map *hdr, int mode)
+{
+ __show_frame_hdr(&hdr->s_ll, &hdr->tp_h, mode, false);
+}
+
extern void dissector_init_all(int fnttype);
extern void dissector_entry_point(uint8_t *packet, size_t len, int linktype, int mode);
extern void dissector_cleanup_all(void);
diff --git a/netsniff-ng.c b/netsniff-ng.c
index e5e91ed..4211386 100644
--- a/netsniff-ng.c
+++ b/netsniff-ng.c
@@ -475,7 +475,7 @@ static void receive_to_xmit(struct ctx *ctx)
timer_purge();
- sock_print_net_stats(rx_sock, 0);
+ sock_print_net_stats(rx_sock);
bpf_release(&bpf_ops);
@@ -788,10 +788,9 @@ static int begin_single_pcap_file(struct ctx *ctx)
return fd;
}
-static void print_pcap_file_stats(int sock, struct ctx *ctx, unsigned long skipped)
+static void print_pcap_file_stats(int sock, struct ctx *ctx)
{
int ret;
- unsigned long good, bad;
struct tpacket_stats kstats;
socklen_t slen = sizeof(kstats);
@@ -802,27 +801,89 @@ static void print_pcap_file_stats(int sock, struct ctx *ctx, unsigned long skipp
panic("Cannot get packet statistics!\n");
if (ctx->print_mode == PRINT_NONE) {
- good = kstats.tp_packets - kstats.tp_drops - skipped;
- bad = kstats.tp_drops + skipped;
-
- printf(".(+%lu/-%lu)", good, bad);
+ printf(".(+%u/-%u)", kstats.tp_packets - kstats.tp_drops,
+ kstats.tp_drops);
fflush(stdout);
}
}
-static void recv_only_or_dump(struct ctx *ctx)
+static void walk_t3_block(struct block_desc *pbd, struct ctx *ctx,
+ int sock, int fd)
{
uint8_t *packet;
+ int num_pkts = pbd->h1.num_pkts, i, ret;
+ unsigned long frame_count = 0;
+ struct tpacket3_hdr *hdr;
+ pcap_pkthdr_t phdr;
+ struct sockaddr_ll *sll;
+
+ hdr = (void *) ((uint8_t *) pbd + pbd->h1.offset_to_first_pkt);
+ sll = (void *) ((uint8_t *) hdr + TPACKET_ALIGN(sizeof(*hdr)));
+
+ for (i = 0; i < num_pkts && likely(sigint == 0); ++i) {
+ __label__ next;
+ packet = ((uint8_t *) hdr + hdr->tp_mac);
+ frame_count++;
+
+ if (ctx->packet_type != -1)
+ if (ctx->packet_type != sll->sll_pkttype)
+ goto next;
+
+ if (dump_to_pcap(ctx)) {
+ tpacket3_hdr_to_pcap_pkthdr(hdr, sll, &phdr, ctx->magic);
+
+ ret = __pcap_io->write_pcap(fd, &phdr, ctx->magic, packet,
+ pcap_get_length(&phdr, ctx->magic));
+ if (unlikely(ret != pcap_get_total_length(&phdr, ctx->magic)))
+ panic("Write error to pcap!\n");
+ }
+
+ __show_frame_hdr(sll, hdr, ctx->print_mode, true);
+
+ dissector_entry_point(packet, hdr->tp_snaplen, ctx->link_type,
+ ctx->print_mode);
+ next:
+
+ hdr = (void *) ((uint8_t *) hdr + hdr->tp_next_offset);
+ sll = (void *) ((uint8_t *) hdr + TPACKET_ALIGN(sizeof(*hdr)));
+
+ if (frame_count_max != 0) {
+ if (frame_count >= frame_count_max) {
+ sigint = 1;
+ break;
+ }
+ }
+
+ if (dump_to_pcap(ctx)) {
+ if (ctx->dump_mode == DUMP_INTERVAL_SIZE) {
+ interval += hdr->tp_snaplen;
+ if (interval > ctx->dump_interval) {
+ next_dump = true;
+ interval = 0;
+ }
+ }
+
+ if (next_dump) {
+ fd = next_multi_pcap_file(ctx, fd);
+ next_dump = false;
+
+ if (unlikely(ctx->verbose))
+ print_pcap_file_stats(sock, ctx);
+ }
+ }
+ }
+}
+
+static void recv_only_or_dump(struct ctx *ctx)
+{
short ifflags = 0;
int sock, irq, ifindex, fd = 0, ret;
unsigned int size, it = 0;
- unsigned long frame_count = 0, skipped = 0;
struct ring rx_ring;
struct pollfd rx_poll;
- struct frame_map *hdr;
struct sock_fprog bpf_ops;
struct timeval start, end, diff;
- pcap_pkthdr_t phdr;
+ struct block_desc *pbd;
sock = pf_socket();
@@ -851,7 +912,7 @@ static void recv_only_or_dump(struct ctx *ctx)
set_sockopt_hwtimestamp(sock, ctx->device_in);
- setup_rx_ring_layout(sock, &rx_ring, size, ctx->jumbo, false);
+ setup_rx_ring_layout(sock, &rx_ring, size, ctx->jumbo, true);
create_rx_ring(sock, &rx_ring, ctx->verbose);
mmap_rx_ring(sock, &rx_ring);
alloc_rx_ring_frames(sock, &rx_ring);
@@ -903,72 +964,15 @@ static void recv_only_or_dump(struct ctx *ctx)
bug_on(gettimeofday(&start, NULL));
while (likely(sigint == 0)) {
- while (user_may_pull_from_rx(rx_ring.frames[it].iov_base)) {
- __label__ next;
+ while (user_may_pull_from_rx_block((pbd = (void *)
+ rx_ring.frames[it].iov_base))) {
+ walk_t3_block(pbd, ctx, sock, fd);
- hdr = rx_ring.frames[it].iov_base;
- packet = ((uint8_t *) hdr) + hdr->tp_h.tp_mac;
- frame_count++;
-
- if (ctx->packet_type != -1)
- if (ctx->packet_type != hdr->s_ll.sll_pkttype)
- goto next;
-
- if (unlikely(ring_frame_size(&rx_ring) < hdr->tp_h.tp_snaplen)) {
- skipped++;
- goto next;
- }
-
- if (dump_to_pcap(ctx)) {
- tpacket_hdr_to_pcap_pkthdr(&hdr->tp_h, &hdr->s_ll, &phdr, ctx->magic);
-
- ret = __pcap_io->write_pcap(fd, &phdr, ctx->magic, packet,
- pcap_get_length(&phdr, ctx->magic));
- if (unlikely(ret != pcap_get_total_length(&phdr, ctx->magic)))
- panic("Write error to pcap!\n");
- }
-
- show_frame_hdr(hdr, ctx->print_mode);
-
- dissector_entry_point(packet, hdr->tp_h.tp_snaplen,
- ctx->link_type, ctx->print_mode);
-
- if (frame_count_max != 0) {
- if (frame_count >= frame_count_max) {
- sigint = 1;
- break;
- }
- }
-
- next:
-
- kernel_may_pull_from_rx(&hdr->tp_h);
-
- it++;
- if (it >= rx_ring.layout.tp_frame_nr)
- it = 0;
+ kernel_may_pull_from_rx_block(pbd);
+ it = (it + 1) % rx_ring.layout3.tp_block_nr;
if (unlikely(sigint == 1))
break;
-
- if (dump_to_pcap(ctx)) {
- if (ctx->dump_mode == DUMP_INTERVAL_SIZE) {
- interval += hdr->tp_h.tp_snaplen;
-
- if (interval > ctx->dump_interval) {
- next_dump = true;
- interval = 0;
- }
- }
-
- if (next_dump) {
- fd = next_multi_pcap_file(ctx, fd);
- next_dump = false;
-
- if (ctx->verbose)
- print_pcap_file_stats(sock, ctx, skipped);
- }
- }
}
poll(&rx_poll, 1, -1);
@@ -978,7 +982,7 @@ static void recv_only_or_dump(struct ctx *ctx)
timersub(&end, &start, &diff);
if (!(ctx->dump_dir && ctx->print_mode == PRINT_NONE)) {
- sock_print_net_stats(sock, skipped);
+ sock_print_net_stats(sock);
printf("\r%12lu sec, %lu usec in total\n",
diff.tv_sec, diff.tv_usec);
diff --git a/pcap_io.h b/pcap_io.h
index 64689af..98f16cf 100644
--- a/pcap_io.h
+++ b/pcap_io.h
@@ -274,66 +274,67 @@ static inline u32 pcap_get_total_length(pcap_pkthdr_t *phdr, enum pcap_type type
}
}
-static inline void tpacket_hdr_to_pcap_pkthdr(struct tpacket2_hdr *thdr,
- struct sockaddr_ll *sll,
- pcap_pkthdr_t *phdr,
- enum pcap_type type)
+static inline void
+__tpacket_hdr_to_pcap_pkthdr(uint32_t sec, uint32_t nsec, uint32_t snaplen,
+ uint32_t len, uint32_t status,
+ struct sockaddr_ll *sll, pcap_pkthdr_t *phdr,
+ enum pcap_type type)
{
switch (type) {
case DEFAULT:
- phdr->ppo.ts.tv_sec = thdr->tp_sec;
- phdr->ppo.ts.tv_usec = thdr->tp_nsec / 1000;
- phdr->ppo.caplen = thdr->tp_snaplen;
- phdr->ppo.len = thdr->tp_len;
+ phdr->ppo.ts.tv_sec = sec;
+ phdr->ppo.ts.tv_usec = nsec / 1000;
+ phdr->ppo.caplen = snaplen;
+ phdr->ppo.len = len;
break;
case DEFAULT_SWAPPED:
- phdr->ppo.ts.tv_sec = ___constant_swab32(thdr->tp_sec);
- phdr->ppo.ts.tv_usec = ___constant_swab32(thdr->tp_nsec / 1000);
- phdr->ppo.caplen = ___constant_swab32(thdr->tp_snaplen);
- phdr->ppo.len = ___constant_swab32(thdr->tp_len);
+ phdr->ppo.ts.tv_sec = ___constant_swab32(sec);
+ phdr->ppo.ts.tv_usec = ___constant_swab32(nsec / 1000);
+ phdr->ppo.caplen = ___constant_swab32(snaplen);
+ phdr->ppo.len = ___constant_swab32(len);
break;
case NSEC:
- phdr->ppn.ts.tv_sec = thdr->tp_sec;
- phdr->ppn.ts.tv_nsec = thdr->tp_nsec;
- phdr->ppn.caplen = thdr->tp_snaplen;
- phdr->ppn.len = thdr->tp_len;
+ phdr->ppn.ts.tv_sec = sec;
+ phdr->ppn.ts.tv_nsec = nsec;
+ phdr->ppn.caplen = snaplen;
+ phdr->ppn.len = len;
break;
case NSEC_SWAPPED:
- phdr->ppn.ts.tv_sec = ___constant_swab32(thdr->tp_sec);
- phdr->ppn.ts.tv_nsec = ___constant_swab32(thdr->tp_nsec);
- phdr->ppn.caplen = ___constant_swab32(thdr->tp_snaplen);
- phdr->ppn.len = ___constant_swab32(thdr->tp_len);
+ phdr->ppn.ts.tv_sec = ___constant_swab32(sec);
+ phdr->ppn.ts.tv_nsec = ___constant_swab32(nsec);
+ phdr->ppn.caplen = ___constant_swab32(snaplen);
+ phdr->ppn.len = ___constant_swab32(len);
break;
case KUZNETZOV:
- phdr->ppk.ts.tv_sec = thdr->tp_sec;
- phdr->ppk.ts.tv_usec = thdr->tp_nsec / 1000;
- phdr->ppk.caplen = thdr->tp_snaplen;
- phdr->ppk.len = thdr->tp_len;
+ phdr->ppk.ts.tv_sec = sec;
+ phdr->ppk.ts.tv_usec = nsec / 1000;
+ phdr->ppk.caplen = snaplen;
+ phdr->ppk.len = len;
phdr->ppk.ifindex = sll->sll_ifindex;
phdr->ppk.protocol = sll->sll_protocol;
phdr->ppk.pkttype = sll->sll_pkttype;
break;
case KUZNETZOV_SWAPPED:
- phdr->ppk.ts.tv_sec = ___constant_swab32(thdr->tp_sec);
- phdr->ppk.ts.tv_usec = ___constant_swab32(thdr->tp_nsec / 1000);
- phdr->ppk.caplen = ___constant_swab32(thdr->tp_snaplen);
- phdr->ppk.len = ___constant_swab32(thdr->tp_len);
+ phdr->ppk.ts.tv_sec = ___constant_swab32(sec);
+ phdr->ppk.ts.tv_usec = ___constant_swab32(nsec / 1000);
+ phdr->ppk.caplen = ___constant_swab32(snaplen);
+ phdr->ppk.len = ___constant_swab32(len);
phdr->ppk.ifindex = ___constant_swab32(sll->sll_ifindex);
phdr->ppk.protocol = ___constant_swab16(sll->sll_protocol);
phdr->ppk.pkttype = sll->sll_pkttype;
break;
case BORKMANN:
- phdr->ppb.ts.tv_sec = thdr->tp_sec;
- phdr->ppb.ts.tv_nsec = thdr->tp_nsec;
- phdr->ppb.caplen = thdr->tp_snaplen;
- phdr->ppb.len = thdr->tp_len;
- phdr->ppb.tsource = tp_to_pcap_tsource(thdr->tp_status);
+ phdr->ppb.ts.tv_sec = sec;
+ phdr->ppb.ts.tv_nsec = nsec;
+ phdr->ppb.caplen = snaplen;
+ phdr->ppb.len = len;
+ phdr->ppb.tsource = tp_to_pcap_tsource(status);
phdr->ppb.ifindex = (u16) sll->sll_ifindex;
phdr->ppb.protocol = sll->sll_protocol;
phdr->ppb.hatype = sll->sll_hatype;
@@ -341,11 +342,11 @@ static inline void tpacket_hdr_to_pcap_pkthdr(struct tpacket2_hdr *thdr,
break;
case BORKMANN_SWAPPED:
- phdr->ppb.ts.tv_sec = ___constant_swab32(thdr->tp_sec);
- phdr->ppb.ts.tv_nsec = ___constant_swab32(thdr->tp_nsec);
- phdr->ppb.caplen = ___constant_swab32(thdr->tp_snaplen);
- phdr->ppb.len = ___constant_swab32(thdr->tp_len);
- phdr->ppb.tsource = ___constant_swab16(tp_to_pcap_tsource(thdr->tp_status));
+ phdr->ppb.ts.tv_sec = ___constant_swab32(sec);
+ phdr->ppb.ts.tv_nsec = ___constant_swab32(nsec);
+ phdr->ppb.caplen = ___constant_swab32(snaplen);
+ phdr->ppb.len = ___constant_swab32(len);
+ phdr->ppb.tsource = ___constant_swab16(tp_to_pcap_tsource(status));
phdr->ppb.ifindex = ___constant_swab16((u16) sll->sll_ifindex);
phdr->ppb.protocol = ___constant_swab16(sll->sll_protocol);
phdr->ppb.hatype = sll->sll_hatype;
@@ -357,6 +358,30 @@ static inline void tpacket_hdr_to_pcap_pkthdr(struct tpacket2_hdr *thdr,
}
}
+/* We need to do this crap here since member offsets are not interleaved,
+ * so hopfully the compiler does his job here. ;-)
+ */
+
+static inline void tpacket_hdr_to_pcap_pkthdr(struct tpacket2_hdr *thdr,
+ struct sockaddr_ll *sll,
+ pcap_pkthdr_t *phdr,
+ enum pcap_type type)
+{
+ __tpacket_hdr_to_pcap_pkthdr(thdr->tp_sec, thdr->tp_nsec,
+ thdr->tp_snaplen, thdr->tp_len,
+ thdr->tp_status, sll, phdr, type);
+}
+
+static inline void tpacket3_hdr_to_pcap_pkthdr(struct tpacket3_hdr *thdr,
+ struct sockaddr_ll *sll,
+ pcap_pkthdr_t *phdr,
+ enum pcap_type type)
+{
+ __tpacket_hdr_to_pcap_pkthdr(thdr->tp_sec, thdr->tp_nsec,
+ thdr->tp_snaplen, thdr->tp_len,
+ 0, sll, phdr, type);
+}
+
static inline void pcap_pkthdr_to_tpacket_hdr(pcap_pkthdr_t *phdr,
enum pcap_type type,
struct tpacket2_hdr *thdr,
diff --git a/ring.h b/ring.h
index be04cf0..8bfe1eb 100644
--- a/ring.h
+++ b/ring.h
@@ -26,6 +26,13 @@
#include "built_in.h"
#include "die.h"
+union tpacket_uhdr {
+ struct tpacket_hdr *h1;
+ struct tpacket2_hdr *h2;
+ struct tpacket3_hdr *h3;
+ void *raw;
+};
+
struct frame_map {
struct tpacket2_hdr tp_h __aligned_tpacket;
struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr));
diff --git a/ring_rx.c b/ring_rx.c
index 0d1f828..ae8ce0a 100644
--- a/ring_rx.c
+++ b/ring_rx.c
@@ -59,7 +59,7 @@ void setup_rx_ring_layout(int sock, struct ring *ring, unsigned int size,
sizeof(struct tpacket_req) !=
offsetof(struct tpacket_req3, tp_retire_blk_tov));
- ring->layout3.tp_retire_blk_tov = 0;
+ ring->layout3.tp_retire_blk_tov = 100; /* 0: let kernel decide */
ring->layout3.tp_sizeof_priv = 0;
ring->layout3.tp_feature_req_word = 0;
diff --git a/xutils.c b/xutils.c
index 60b598c..5421d7d 100644
--- a/xutils.c
+++ b/xutils.c
@@ -716,7 +716,7 @@ int device_bind_irq_to_cpu(int irq, int cpu)
return (ret > 0 ? 0 : ret);
}
-void sock_print_net_stats(int sock, unsigned long skipped)
+void sock_print_net_stats(int sock)
{
int ret;
struct tpacket_stats kstats;
@@ -730,10 +730,11 @@ void sock_print_net_stats(int sock, unsigned long skipped)
uint64_t drops = kstats.tp_drops;
printf("\r%12ld packets incoming\n", packets);
- printf("\r%12ld packets passed filter\n", packets - drops - skipped);
- printf("\r%12ld packets failed filter (out of space)\n", drops + skipped);
+ printf("\r%12ld packets passed filter\n", packets - drops);
+ printf("\r%12ld packets failed filter (out of space)\n", drops);
if (kstats.tp_packets > 0)
- printf("\r%12.4lf%\% packet droprate\n", (1.0 * drops / packets) * 100.0);
+ printf("\r%12.4lf%\% packet droprate\n",
+ (1.0 * drops / packets) * 100.0);
}
}
diff --git a/xutils.h b/xutils.h
index 38c8da4..6e72b5c 100644
--- a/xutils.h
+++ b/xutils.h
@@ -39,7 +39,7 @@ extern int device_address(const char *ifname, int af, struct sockaddr_storage *s
extern int device_irq_number(const char *ifname);
extern int device_set_irq_affinity_list(int irq, unsigned long from, unsigned long to);
extern int device_bind_irq_to_cpu(int irq, int cpu);
-extern void sock_print_net_stats(int sock, unsigned long skipped);
+extern void sock_print_net_stats(int sock);
extern int device_ifindex(const char *ifname);
extern short device_get_flags(const char *ifname);
extern void device_set_flags(const char *ifname, const short flags);