/*
 * netsniff-ng - the packet sniffing beast
 * Copyright 2011 - 2013 Daniel Borkmann <dborkma@tik.ee.ethz.ch>,
 * Swiss federal institute of technology (ETH Zurich)
 * Subject to the GPL, version 2.
 */

#include <stdio.h>
#include <string.h>
#include <getopt.h>
#include <ctype.h>
#include <stdbool.h>
#include <sched.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/fsuid.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <net/ethernet.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <linux/icmp.h>
#include <linux/if.h>
#include <arpa/inet.h>
#include <signal.h>
#include <stdint.h>
#include <stdlib.h>
#include <fcntl.h>
#include <time.h>
#include <poll.h>
#include <netdb.h>
#include <math.h>
#include <unistd.h>

#include "xmalloc.h"
#include "die.h"
#include "str.h"
#include "sig.h"
#include "sock.h"
#include "cpus.h"
#include "lockme.h"
#include "privs.h"
#include "proc.h"
#include "mac80211.h"
#include "ioops.h"
#include "irq.h"
#include "config.h"
#include "built_in.h"
#include "trafgen_conf.h"
#include "tprintf.h"
#include "timer.h"
#include "ring_tx.h"
#include "csum.h"
#include "trafgen_proto.h"

#ifndef timeval_to_timespec
#define timeval_to_timespec(tv, ts) {         \
	(ts)->tv_sec = (tv)->tv_sec;          \
	(ts)->tv_nsec = (tv)->tv_usec * 1000; \
}
#endif

enum shaper_type {
	SHAPER_NONE,
	SHAPER_PKTS,
	SHAPER_BYTES,
};

struct shaper {
	enum shaper_type type;
	unsigned long long sent;
	unsigned long long rate;
	struct timeval start;
	struct timeval end;
	struct timespec delay;
};

struct ctx {
	bool rand, rfraw, jumbo_support, verbose, smoke_test, enforce, qdisc_path;
	size_t reserve_size;
	unsigned long num;
	unsigned int cpus;
	uid_t uid; gid_t gid;
	char *device, *device_trans, *rhost;
	struct sockaddr_in dest;
	struct shaper sh;
	char *packet_str;
};

struct cpu_stats {
	unsigned long tv_sec, tv_usec;
	unsigned long long tx_packets, tx_bytes;
	unsigned long long cf_packets, cf_bytes;
	unsigned long long cd_packets;
	sig_atomic_t state;
};

static sig_atomic_t sigint = 0;

struct packet *packets = NULL;
size_t plen = 0;

struct packet_dyn *packet_dyn = NULL;
size_t dlen = 0;

static const char *short_options = "d:c:n:t:vJhS:rk:i:o:VRs:P:eE:pu:g:CHQqD:b:";
static const struct option long_options[] = {
	{"dev",			required_argument,	NULL, 'd'},
	{"out",			required_argument,	NULL, 'o'},
	{"in",			required_argument,	NULL, 'i'},
	{"conf",		required_argument,	NULL, 'c'},
	{"num",			required_argument,	NULL, 'n'},
	{"gap",			required_argument,	NULL, 't'},
	{"rate",		required_argument,	NULL, 'b'},
	{"cpus",		required_argument,	NULL, 'P'},
	{"ring-size",		required_argument,	NULL, 'S'},
	{"kernel-pull",		required_argument,	NULL, 'k'},
	{"smoke-test",		required_argument,	NULL, 's'},
	{"seed",		required_argument,	NULL, 'E'},
	{"user",		required_argument,	NULL, 'u'},
	{"group",		required_argument,	NULL, 'g'},
	{"prio-high",		no_argument,		NULL, 'H'},
	{"notouch-irq",		no_argument,		NULL, 'Q'},
	{"no-sock-mem", 	no_argument,		NULL, 'A'},
	{"qdisc-path",		no_argument,		NULL, 'q'},
	{"jumbo-support",	no_argument,		NULL, 'J'},
	{"no-cpu-stats",	no_argument,		NULL, 'C'},
	{"cpp",			no_argument,		NULL, 'p'},
	{"define",		required_argument,	NULL, 'D'},
	{"rfraw",		no_argument,		NULL, 'R'},
	{"rand",		no_argument,		NULL, 'r'},
	{"verbose",		no_argument,		NULL, 'V'},
	{"version",		no_argument,		NULL, 'v'},
	{"example",		no_argument,		NULL, 'e'},
	{"help",		no_argument,		NULL, 'h'},
	{NULL, 0, NULL, 0}
};

static const char *copyright = "Please report bugs to <netsniff-ng@googlegroups.com>\n"
	"Copyright (C) 2011-2013 Daniel Borkmann <dborkma@tik.ee.ethz.ch>,\n"
	"Swiss federal institute of technology (ETH Zurich)\n"
	"License: GNU GPL version 2.0\n"
	"This is free software: you are free to change and redistribute it.\n"
	"There is NO WARRANTY, to the extent permitted by law.";

static int sock;
static struct cpu_stats *stats;
static unsigned int seed;

#define CPU_STATS_STATE_CFG	1
#define CPU_STATS_STATE_CHK	2
#define CPU_STATS_STATE_RES	4

#ifndef ICMP_FILTER
# define ICMP_FILTER	1

struct icmp_filter {
	__u32	data;
};
#endif

#define SMOKE_N_PROBES	100

#define PKT_MIN_LEN 14

static void signal_handler(int number)
{
	switch (number) {
	case SIGINT:
	case SIGQUIT:
	case SIGTERM:
		sigint = 1;
	case SIGHUP:
	default:
		break;
	}
}

static void __noreturn help(void)
{
	printf("trafgen %s, multithreaded zero-copy network packet generator\n", VERSION_STRING);
	puts("http://www.netsniff-ng.org\n\n"
	     "Usage: trafgen [options] [packet]\n"
	     "Options:\n"
	     "  -i|-c|--in|--conf <cfg/->      Packet configuration file/stdin\n"
	     "  -o|-d|--out|--dev <netdev>     Networking device i.e., eth0\n"
	     "  -p|--cpp                       Run packet config through C preprocessor\n"
	     "  -D|--define                    Add macro/define for C preprocessor\n"
	     "  -J|--jumbo-support             Support 64KB super jumbo frames (def: 2048B)\n"
	     "  -R|--rfraw                     Inject raw 802.11 frames\n"
	     "  -s|--smoke-test <ipv4>         Probe if machine survived fuzz-tested packet\n"
	     "  -n|--num <uint>                Number of packets until exit (def: 0)\n"
	     "  -r|--rand                      Randomize packet selection (def: round robin)\n"
	     "  -P|--cpus <uint>               Specify number of forks(<= CPUs) (def: #CPUs)\n"
	     "  -t|--gap <time>                Set approx. interpacket gap (s/ms/us/ns, def: us)\n"
	     "  -b|--rate <rate>               Send traffic at specified rate (pps/B/kB/MB/GB/kbit/Mbit/Gbit/KiB/MiB/GiB)\n"
	     "  -S|--ring-size <size>          Manually set mmap size (KiB/MiB/GiB)\n"
	     "  -E|--seed <uint>               Manually set srand(3) seed\n"
	     "  -u|--user <userid>             Drop privileges and change to userid\n"
	     "  -g|--group <groupid>           Drop privileges and change to groupid\n"
	     "  -H|--prio-high                 Make this high priority process\n"
	     "  -A|--no-sock-mem               Don't tune core socket memory\n"
	     "  -Q|--notouch-irq               Do not touch IRQ CPU affinity of NIC\n"
	     "  -q|--qdisc-path                Enabled qdisc kernel path (default off since 3.14)\n"
	     "  -V|--verbose                   Be more verbose\n"
	     "  -C|--no-cpu-stats              Do not print CPU time statistics on exit\n"
	     "  -v|--version                   Show version and exit\n"
	     "  -e|--example                   Show built-in packet config example\n"
	     "  -h|--help                      Guess what?!\n\n"
	     "Examples:\n"
	     "  trafgen --dev eth0 --conf trafgen.cfg\n"
	     "  trafgen -e | trafgen -i - -o eth0 --cpp -n 1\n"
	     "  trafgen --dev eth0 --conf fuzzing.cfg --smoke-test 10.0.0.1\n"
	     "  trafgen --dev wlan0 --rfraw --conf beacon-test.txf -V --cpus 2\n"
	     "  trafgen --dev eth0 --conf frag_dos.cfg --rand --gap 1000us\n"
	     "  trafgen --dev eth0 --conf icmp.cfg --rand --num 1400000 -k1000\n"
	     "  trafgen --dev eth0 --conf tcp_syn.cfg -u `id -u bob` -g `id -g bob`\n"
	     "  trafgen --dev eth0 '{ fill(0xff, 6), 0x00, 0x02, 0xb3, rnd(3), c16(0x0800), fill(0xca, 64) }'\n\n"
	     "Arbitrary packet config examples (e.g. trafgen -e > trafgen.cfg):\n"
	     "  Run packet on  all CPUs:              { fill(0xff, 64) csum16(0, 64) }\n"
	     "  Run packet only on CPU1:    cpu(1):   { rnd(64), 0b11001100, 0xaa }\n"
	     "  Run packet only on CPU1-2:  cpu(1-2): { drnd(64),'a',csum16(1, 8),'b',42 }\n\n"
	     "Generate config files from existing pcap using netsniff-ng:\n"
	     "  netsniff-ng --in dump.pcap --out dump.cfg\n\n"
	     "Note:\n"
	     "  Smoke/fuzz test example: machine A, 10.0.0.2 (trafgen) is directly\n"
	     "  connected to machine B (test kernel), 10.0.0.1. If ICMP reply fails\n"
	     "  we assume the kernel crashed, thus we print the packet and quit.\n"
	     "  In case you find a ping-of-death, please mention trafgen in your\n"
	     "  commit message of the fix!\n\n"
	     "  For introducing bit errors, delays with random variation and more,\n"
	     "  make use of tc(8) with its different disciplines, i.e. netem.\n\n"
	     "  For generating different package distributions, you can use scripting\n"
	     "  to generate a trafgen config file with packet ratios as:\n\n"
	     "     IMIX             64:7,  570:4,  1518:1\n"
	     "     Tolly            64:55,  78:5,   576:17, 1518:23\n"
	     "     Cisco            64:7,  594:4,  1518:1\n"
	     "     RPR Trimodal     64:60, 512:20, 1518:20\n"
	     "     RPR Quadrimodal  64:50, 512:15, 1518:15, 9218:20\n");
	puts(copyright);
	die();
}

static void __noreturn example(void)
{
	const char *e =
	"/* Note: dynamic elements make trafgen slower! */\n"
	"#include <stddef.h>\n\n"
	"{\n"
	"  /* MAC Destination */\n"
	"  fill(0xff, ETH_ALEN),\n"
	"  /* MAC Source */\n"
	"  0x00, 0x02, 0xb3, drnd(3),\n"
	"  /* IPv4 Protocol */\n"
	"  c16(ETH_P_IP),\n"
	"  /* IPv4 Version, IHL, TOS */\n"
	"  0b01000101, 0,\n"
	"  /* IPv4 Total Len */\n"
	"  c16(59),\n"
	"  /* IPv4 Ident */\n"
	"  drnd(2),\n"
	"  /* IPv4 Flags, Frag Off */\n"
	"  0b01000000, 0,\n"
	"  /* IPv4 TTL */\n"
	"  64,\n"
	"  /* Proto TCP */\n"
	"  0x06,\n"
	"  /* IPv4 Checksum (IP header from, to) */\n"
	"  csumip(14, 33),\n"
	"  /* Source IP */\n"
	"  drnd(4),\n"
	"  /* Dest IP */\n"
	"  drnd(4),\n"
	"  /* TCP Source Port */\n"
	"  drnd(2),\n"
	"  /* TCP Dest Port */\n"
	"  c16(80),\n"
	"  /* TCP Sequence Number */\n"
	"  drnd(4),\n"
	"  /* TCP Ackn. Number */\n"
	"  c32(0),\n"
	"  /* TCP Header length + TCP SYN/ECN Flag */\n"
	"  c16((8 << 12) | TCP_FLAG_SYN | TCP_FLAG_ECE)\n"
	"  /* Window Size */\n"
	"  c16(16),\n"
	"  /* TCP Checksum (offset IP, offset TCP) */\n"
	"  csumtcp(14, 34),\n"
	"  /* TCP Options */\n"
	"  0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0x06,\n"
	"  0x91, 0x68, 0x7d, 0x06, 0x91, 0x68, 0x6f,\n"
	"  /* Data blob */\n"
	"  \"gotcha!\",\n"
	"}";
	puts(e);
	die();
}

static void __noreturn version(void)
{
	printf("trafgen %s, Git id: %s\n", VERSION_LONG, GITVERSION);
	puts("multithreaded zero-copy network packet generator\n"
	     "http://www.netsniff-ng.org\n");
	puts(copyright);
	die();
}

static void apply_counter(int id)
{
	size_t j, counter_max = packet_dyn[id].clen;

	for (j = 0; j < counter_max; ++j) {
		uint8_t val;
		struct counter *counter = &packet_dyn[id].cnt[j];

		val = counter->val - counter->min;

		switch (counter->type) {
		case TYPE_INC:
			val = (val + counter->inc) % (counter->max - counter->min + 1);
			break;
		case TYPE_DEC:
			val = (val - counter->inc) % (counter->min - counter->max + 1);
			break;
		default:
			bug();
		}

		counter->val = val + counter->min;
		packets[id].payload[counter->off] = val;
	}
}

static void apply_randomizer(int id)
{
	size_t j, rand_max = packet_dyn[id].rlen;

	for (j = 0; j < rand_max; ++j) {
		uint8_t val = (uint8_t) rand();
		struct randomizer *randomizer = &packet_dyn[id].rnd[j];

		packets[id].payload[randomizer->off] = val;
	}
}

static void apply_csum16(int id)
{
	size_t j, csum_max = packet_dyn[id].slen;

	for (j = 0; j < csum_max; ++j) {
		uint16_t sum = 0;
		struct csum16 *csum = &packet_dyn[id].csum[j];

		fmemset(&packets[id].payload[csum->off], 0, sizeof(sum));
		if (unlikely((size_t) csum->to >= packets[id].len))
			csum->to = packets[id].len - 1;

		switch (csum->which) {
		case CSUM_IP:
			sum = calc_csum(packets[id].payload + csum->from,
					csum->to - csum->from + 1);
			break;
		case CSUM_UDP:
			sum = p4_csum((void *) packets[id].payload + csum->from,
				      packets[id].payload + csum->to,
				      (packets[id].len - csum->to),
				      IPPROTO_UDP);
			break;
		case CSUM_TCP:
			sum = p4_csum((void *) packets[id].payload + csum->from,
				      packets[id].payload + csum->to,
				      (packets[id].len - csum->to),
				      IPPROTO_TCP);
			break;
		case CSUM_UDP6:
			sum = p6_csum((void *) packets[id].payload + csum->from,
				      packets[id].payload + csum->to,
				      (packets[id].len - csum->to),
				      IPPROTO_UDP);
			break;
		case CSUM_TCP6:
			sum = p6_csum((void *) packets[id].payload + csum->from,
				      packets[id].payload + csum->to,
				      (packets[id].len - csum->to),
				      IPPROTO_TCP);
			break;
		default:
			bug();
			break;
		}

		fmemcpy(&packets[id].payload[csum->off], &sum, sizeof(sum));
	}
}

static void preprocess_packets(void)
{
	size_t i;

	for (i = 0; i < plen; i++) {
		struct packet_dyn *pktd = &packet_dyn[i];

		if (packet_dyn_has_only_csums(pktd)) {
			apply_csum16(i);
			pktd->slen = 0;
			xfree(pktd->csum);
		}
	}
}

static struct cpu_stats *setup_shared_var(unsigned int cpus)
{
	int fd;
	size_t len = cpus * sizeof(struct cpu_stats);
	char *zbuff, file[256];
	struct cpu_stats *buff;

	slprintf(file, sizeof(file), ".tmp_mmap.XXXXXX");
	fd = mkostemp_or_die(file, O_RDWR | O_CREAT | O_TRUNC);
	zbuff = xzmalloc(len);
	write_or_die(fd, zbuff, len);
	xfree(zbuff);

	buff = mmap(NULL, len, PROT_READ | PROT_WRITE,
		    MAP_SHARED, fd, 0);
	if (buff == MAP_FAILED)
		panic("Cannot setup shared variable!\n");

	close(fd);
	unlink(file);

	memset(buff, 0, len);
	return buff;
}

static void destroy_shared_var(void *buff, unsigned int cpus)
{
	munmap(buff, cpus * sizeof(struct cpu_stats));
}

static void dump_trafgen_snippet(uint8_t *payload, size_t len)
{
	size_t i;

	printf("{");
	for (i = 0; i < len; ++i) {
		if (i % 15 == 0)
			printf("\n  ");
		printf("0x%02x, ", payload[i]);
	}
	printf("\n}\n");
	fflush(stdout);
}

static int xmit_smoke_setup(struct ctx *ctx)
{
	int icmp_sock, ret, ttl = 64;
	struct icmp_filter filter;

	icmp_sock = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
	if (icmp_sock < 0)
		panic("Cannot get a ICMP socket: %s!\n", strerror(errno));

	filter.data = ~(1 << ICMP_ECHOREPLY);

	ret = setsockopt(icmp_sock, SOL_RAW, ICMP_FILTER, &filter, sizeof(filter));
	if (ret < 0)
		panic("Cannot install filter!\n");

	ret = setsockopt(icmp_sock, SOL_IP, IP_TTL, &ttl, sizeof(ttl));
	if (ret < 0)
		panic("Cannot set TTL!\n");

	memset(&ctx->dest, 0, sizeof(ctx->dest));
	ctx->dest.sin_family = AF_INET;
	ctx->dest.sin_port = 0;

	ret = inet_aton(ctx->rhost, &ctx->dest.sin_addr);
	if (ret < 0)
		panic("Cannot resolve address!\n");

	return icmp_sock;
}

static int xmit_smoke_probe(int icmp_sock, struct ctx *ctx)
{
	int ret;
	unsigned int i, j;
	short ident, cnt = 1, idstore[SMOKE_N_PROBES];
	uint8_t outpack[512], *data;
	struct icmphdr *icmp;
	struct iphdr *ip;
	size_t len = sizeof(*icmp) + 56;
	struct sockaddr_in from;
	socklen_t from_len;
	struct pollfd fds = {
		.fd = icmp_sock,
		.events = POLLIN,
	};

	fmemset(idstore, 0, sizeof(idstore));
	for (j = 0; j < SMOKE_N_PROBES; j++) {
		while ((ident = htons((short) rand())) == 0)
			sleep(0);
		idstore[j] = ident;

		memset(outpack, 0, sizeof(outpack));
		icmp = (void *) outpack;
		icmp->type = ICMP_ECHO;
		icmp->un.echo.id = ident;
		icmp->un.echo.sequence = htons(cnt++);

		data = ((uint8_t *) outpack + sizeof(*icmp));
		for (i = 0; i < 56; ++i)
			data[i] = (uint8_t) rand();

		icmp->checksum = csum((unsigned short *) outpack,
				      len / sizeof(unsigned short));

		ret = sendto(icmp_sock, outpack, len, MSG_DONTWAIT,
			     (struct sockaddr *) &ctx->dest, sizeof(ctx->dest));
		if (unlikely(ret != (int) len))
			panic("Cannot send out probe: %s!\n", strerror(errno));

		ret = poll(&fds, 1, 50);
		if (ret < 0)
			panic("Poll failed!\n");

		if (fds.revents & POLLIN) {
			ret = recvfrom(icmp_sock, outpack, sizeof(outpack), 0,
				       (struct sockaddr *) &from, &from_len);
			if (unlikely(ret <= 0))
				panic("Probe receive failed!\n");
			if (unlikely(from_len != sizeof(ctx->dest)))
				continue;
			if (unlikely(memcmp(&from, &ctx->dest, sizeof(ctx->dest))))
				continue;
			if (unlikely((size_t) ret < sizeof(*ip) + sizeof(*icmp)))
				continue;
			ip = (void *) outpack;
			if (unlikely(ip->ihl * 4 + sizeof(*icmp) > (size_t) ret))
				continue;
			icmp = (void *) outpack + ip->ihl * 4;
			for (i = 0; i < array_size(idstore); ++i) {
				if (unlikely(icmp->un.echo.id != idstore[i]))
					continue;
				return 0;
			}
		}
	}

	return -1;
}

static bool shaper_is_set(struct shaper *sh)
{
	if ((sh->delay.tv_sec | sh->delay.tv_nsec) > 0)
		return true;

	return sh->type != SHAPER_NONE;
}

static void shaper_init(struct shaper *sh)
{
	if (sh->type == SHAPER_NONE)
		return;

	memset(&sh->delay, 0, sizeof(struct timespec));
	bug_on(gettimeofday(&sh->start, NULL));
	sh->sent = 0;
}

static void shaper_set_delay(struct shaper *sh, time_t sec, long int ns)
{
	sh->delay.tv_sec = sec;
	sh->delay.tv_nsec = ns;
}

static void shaper_set_rate(struct shaper *sh, unsigned long long rate,
			    enum shaper_type type)
{
	memset(sh, 0, sizeof(struct shaper));
	sh->rate = rate;
	sh->type = type;
}

static void shaper_delay(struct shaper *sh, unsigned long pkt_len)
{
	if (sh->type != SHAPER_NONE)
		sh->sent += sh->type == SHAPER_BYTES ? pkt_len : 1;

	if (sh->sent >= sh->rate && sh->rate > 0) {
		struct timeval delay_us;
		struct timeval time_sent;
		struct timeval time_1s = { .tv_sec = 1 };

		bug_on(gettimeofday(&sh->end, NULL));
		timersub(&sh->end, &sh->start, &time_sent);

		if (timercmp(&time_1s, &time_sent, > )) {
			timersub(&time_1s, &time_sent, &delay_us);
			timeval_to_timespec(&delay_us, &sh->delay);
		}
	}

	if ((sh->delay.tv_sec | sh->delay.tv_nsec) > 0) {
		nanosleep(&sh->delay, NULL);

		shaper_init(sh);
	}
}

static inline void packet_apply_dyn_elements(int idx)
{
	if (packet_dyn_has_elems(&packet_dyn[idx])) {
		apply_counter(idx);
		apply_randomizer(idx);
		apply_csum16(idx);
	}

	if (packet_dyn_has_fields(&packet_dyn[idx])) {
		uint32_t i;

		for (i = 0; i < packet_dyn[idx].flen; i++)
			proto_field_dyn_apply(packet_dyn[idx].fields[i]);

		proto_packet_update(idx);
	}
}

static void xmit_slowpath_or_die(struct ctx *ctx, unsigned int cpu, unsigned long orig_num)
{
	int ret, icmp_sock = -1;
	unsigned long num = 1, i = 0;
	struct timeval start, end, diff;
	unsigned long long tx_bytes = 0, tx_packets = 0;
	struct sockaddr_ll saddr = {
		.sll_family = PF_PACKET,
		.sll_halen = ETH_ALEN,
		.sll_ifindex = device_ifindex(ctx->device),
	};

	if (ctx->num > 0)
		num = ctx->num;
	if (ctx->num == 0 && orig_num > 0)
		num = 0;

	if (ctx->smoke_test)
		icmp_sock = xmit_smoke_setup(ctx);

	drop_privileges(ctx->enforce, ctx->uid, ctx->gid);

	bug_on(gettimeofday(&start, NULL));

	if (shaper_is_set(&ctx->sh))
		shaper_init(&ctx->sh);

	while (likely(sigint == 0 && num > 0 && plen > 0)) {
		packet_apply_dyn_elements(i);
retry:
		ret = sendto(sock, packets[i].payload, packets[i].len, 0,
			     (struct sockaddr *) &saddr, sizeof(saddr));
		if (unlikely(ret < 0)) {
			if (errno == ENOBUFS) {
				sched_yield();
				goto retry;
			}
			if (ctx->smoke_test)
				panic("Sendto error: %s!\n", strerror(errno));
		}

		tx_bytes += packets[i].len;
		tx_packets++;

		if (ctx->smoke_test) {
			ret = xmit_smoke_probe(icmp_sock, ctx);
			if (unlikely(ret < 0)) {
				printf("%sSmoke test alert:%s\n", colorize_start(bold), colorize_end());
				printf("  Remote host seems to be unresponsive to ICMP probes!\n");
				printf("  Last instance was packet%lu, seed:%u, trafgen snippet:\n\n",
				       i, seed);

				dump_trafgen_snippet(packets[i].payload, packets[i].len);
				break;
			}
		}

		if (!ctx->rand) {
			i++;
			if (i >= plen)
				i = 0;
		} else
			i = rand() % plen;

		if (ctx->num > 0)
			num--;

		if (shaper_is_set(&ctx->sh))
			shaper_delay(&ctx->sh, packets[i].len);
	}

	bug_on(gettimeofday(&end, NULL));
	timersub(&end, &start, &diff);

	if (ctx->smoke_test)
		close(icmp_sock);

	stats[cpu].tx_packets = tx_packets;
	stats[cpu].tx_bytes = tx_bytes;
	stats[cpu].tv_sec = diff.tv_sec;
	stats[cpu].tv_usec = diff.tv_usec;

	stats[cpu].state |= CPU_STATS_STATE_RES;
}

static void xmit_fastpath_or_die(struct ctx *ctx, unsigned int cpu, unsigned long orig_num)
{
	int ifindex = device_ifindex(ctx->device);
	uint8_t *out = NULL;
	unsigned int it = 0;
	unsigned long num = 1, i = 0;
	size_t size = ring_size(ctx->device, ctx->reserve_size);
	struct ring tx_ring;
	struct frame_map *hdr;
	struct timeval start, end, diff;
	unsigned long long tx_bytes = 0, tx_packets = 0;

	set_sock_prio(sock, 512);

	ring_tx_setup(&tx_ring, sock, size, ifindex, ctx->jumbo_support, ctx->verbose);

	drop_privileges(ctx->enforce, ctx->uid, ctx->gid);

	if (ctx->num > 0)
		num = ctx->num;
	if (ctx->num == 0 && orig_num > 0)
		num = 0;

	bug_on(gettimeofday(&start, NULL));

	while (likely(sigint == 0 && num > 0 && plen > 0)) {
		if (!user_may_pull_from_tx(tx_ring.frames[it].iov_base)) {
			int ret = pull_and_flush_tx_ring(sock);
			if (unlikely(ret < 0)) {
				/* We could hit EBADF if the socket has been closed before
				 * the timer was triggered.
				 */
				if (errno != EBADF && errno != ENOBUFS)
					panic("Flushing TX_RING failed: %s!\n", strerror(errno));
			}

			continue;
		}

		hdr = tx_ring.frames[it].iov_base;
		out = ((uint8_t *) hdr) + TPACKET2_HDRLEN - sizeof(struct sockaddr_ll);

		hdr->tp_h.tp_snaplen = packets[i].len;
		hdr->tp_h.tp_len = packets[i].len;

		packet_apply_dyn_elements(i);

		fmemcpy(out, packets[i].payload, packets[i].len);

		tx_bytes += packets[i].len;
		tx_packets++;

		if (!ctx->rand) {
			i++;
			if (i >= plen)
				i = 0;
		} else
			i = rand() % plen;

		kernel_may_pull_from_tx(&hdr->tp_h);

		it++;
		if (it >= tx_ring.layout.tp_frame_nr)
			it = 0;

		if (ctx->num > 0)
			num--;
	}

	bug_on(gettimeofday(&end, NULL));
	timersub(&end, &start, &diff);

	pull_and_flush_tx_ring_wait(sock);
	destroy_tx_ring(sock, &tx_ring);

	stats[cpu].tx_packets = tx_packets;
	stats[cpu].tx_bytes = tx_bytes;
	stats[cpu].tv_sec = diff.tv_sec;
	stats[cpu].tv_usec = diff.tv_usec;

	stats[cpu].state |= CPU_STATS_STATE_RES;
}

static inline void __set_state(unsigned int cpu, sig_atomic_t s)
{
	stats[cpu].state = s;
}

static inline sig_atomic_t __get_state(unsigned int cpu)
{
	return stats[cpu].state;
}

static unsigned long __wait_and_sum_others(struct ctx *ctx, unsigned int cpu)
{
	unsigned int i;
	unsigned long total;

	for (i = 0, total = plen; i < ctx->cpus; i++) {
		if (i == cpu)
			continue;

		while ((__get_state(i) &
		       (CPU_STATS_STATE_CFG |
			CPU_STATS_STATE_RES)) == 0 &&
		       sigint == 0)
			sched_yield();

		total += stats[i].cf_packets;
	}

	return total;
}

static void __correct_global_delta(struct ctx *ctx, unsigned int cpu, unsigned long orig)
{
	unsigned int i;
	unsigned long total;
	int cpu_sel;
	long long delta_correction = 0;

	for (i = 0, total = ctx->num; i < ctx->cpus; i++) {
		if (i == cpu)
			continue;

		while ((__get_state(i) &
		       (CPU_STATS_STATE_CHK |
			CPU_STATS_STATE_RES)) == 0 &&
		       sigint == 0)
			sched_yield();

		total += stats[i].cd_packets;
	}

	if (total > orig)
		delta_correction = -1 * ((long long) total - orig);
	if (total < orig)
		delta_correction = +1 * ((long long) orig - total);

	for (cpu_sel = -1, i = 0; i < ctx->cpus; i++) {
		if (stats[i].cd_packets > 0) {
			if ((long long) stats[i].cd_packets +
			    delta_correction >= 0) {
				cpu_sel = i;
				break;
			}
		}
	}

	if ((int) cpu == cpu_sel)
		ctx->num += delta_correction;
}

static void __set_state_cf(unsigned int cpu, unsigned long p, unsigned long b,
			   sig_atomic_t s)
{
	stats[cpu].cf_packets = p;
	stats[cpu].cf_bytes = b;
	stats[cpu].state = s;
}

static void __set_state_cd(unsigned int cpu, unsigned long p, sig_atomic_t s)
{
	stats[cpu].cd_packets = p;
	stats[cpu].state = s;
}

static void xmit_packet_precheck(struct ctx *ctx, unsigned int cpu)
{
	unsigned long plen_total, orig = ctx->num;
	size_t total_len = 0;
	unsigned int i;

	bug_on(plen != dlen);

	for (i = 0; i < plen; ++i)
		total_len += packets[i].len;

	__set_state_cf(cpu, plen, total_len, CPU_STATS_STATE_CFG);
	plen_total = __wait_and_sum_others(ctx, cpu);

	if (orig > 0) {
		ctx->num = (unsigned long) round((1.0 * plen / plen_total) * orig);

		__set_state_cd(cpu, ctx->num, CPU_STATS_STATE_CHK |
			       CPU_STATS_STATE_CFG);
		__correct_global_delta(ctx, cpu, orig);
	}

	if (plen == 0) {
		__set_state(cpu, CPU_STATS_STATE_RES);
		return;
	}
}

static void main_loop(struct ctx *ctx, char *confname, bool slow,
		      unsigned int cpu, bool invoke_cpp, char **cpp_argv,
		      unsigned long orig_num)
{
	if (ctx->packet_str)
		compile_packets_str(ctx->packet_str, ctx->verbose, cpu);
	else
		compile_packets(confname, ctx->verbose, cpu, invoke_cpp, cpp_argv);

	preprocess_packets();

	xmit_packet_precheck(ctx, cpu);

	if (cpu == 0) {
		unsigned int i;
		size_t total_len = 0, total_pkts = 0;

		for (i = 0; i < ctx->cpus; ++i) {
			total_len  += stats[i].cf_bytes;
			total_pkts += stats[i].cf_packets;
		}

		printf("%6zu packets to schedule\n", total_pkts);
		printf("%6zu bytes in total\n", total_len);
		printf("Running! Hang up with ^C!\n\n");
		fflush(stdout);
	}

	sock = pf_socket();

	if (ctx->qdisc_path == false)
		set_sock_qdisc_bypass(sock, ctx->verbose);

	if (slow)
		xmit_slowpath_or_die(ctx, cpu, orig_num);
	else
		xmit_fastpath_or_die(ctx, cpu, orig_num);

	close(sock);

	cleanup_packets();
}

static unsigned int generate_srand_seed(void)
{
	int fd;
	unsigned int _seed;

	fd = open("/dev/urandom", O_RDONLY);
	if (fd < 0)
		return time(NULL);

	read_or_die(fd, &_seed, sizeof(_seed));

	close(fd);
	return _seed;
}

static void on_panic_del_rfmon(void *arg)
{
	leave_rfmon_mac80211(arg);
}

int main(int argc, char **argv)
{
	bool slow = false, invoke_cpp = false, reseed = true, cpustats = true;
	bool prio_high = false, set_irq_aff = true, set_sock_mem = true;
	int c, opt_index, vals[4] = {0}, irq;
	uint64_t gap = 0;
	unsigned int i;
	char *confname = NULL, *ptr;
	unsigned long cpus_tmp, orig_num = 0;
	unsigned long long tx_packets, tx_bytes;
	struct ctx ctx;
	int min_opts = 5;
	char **cpp_argv = NULL;
	size_t cpp_argc = 0;
	unsigned long long rate;
	enum shaper_type shape_type;
	struct timespec delay;

	fmemset(&ctx, 0, sizeof(ctx));
	ctx.cpus = get_number_cpus_online();
	ctx.uid = getuid();
	ctx.gid = getgid();
	ctx.qdisc_path = false;

	/* Keep an initial small default size to reduce cache-misses. */
	ctx.reserve_size = 512 * (1 << 10);

	while ((c = getopt_long(argc, argv, short_options, long_options,
				&opt_index)) != EOF) {
		switch (c) {
		case 'h':
			help();
			break;
		case 'v':
			version();
			break;
		case 'C':
			cpustats = false;
			break;
		case 'e':
			example();
			break;
		case 'p':
			invoke_cpp = true;
			break;
		case 'D':
			cpp_argv = argv_insert(cpp_argv, &cpp_argc, "-D");
			cpp_argv = argv_insert(cpp_argv, &cpp_argc, optarg);
			break;
		case 'V':
			ctx.verbose = true;
			break;
		case 'P':
			cpus_tmp = strtoul(optarg, NULL, 0);
			if (cpus_tmp > 0 && cpus_tmp < ctx.cpus)
				ctx.cpus = cpus_tmp;
			break;
		case 'd':
		case 'o':
			ctx.device = xstrndup(optarg, IFNAMSIZ);
			break;
		case 'H':
			prio_high = true;
			break;
		case 'A':
			set_sock_mem = false;
			break;
		case 'Q':
			set_irq_aff = false;
			break;
		case 'q':
			ctx.qdisc_path = true;
			break;
		case 'r':
			ctx.rand = true;
			break;
		case 's':
			slow = true;
			ctx.cpus = 1;
			ctx.smoke_test = true;
			ctx.rhost = xstrdup(optarg);
			break;
		case 'R':
			ctx.rfraw = true;
			break;
		case 'J':
			ctx.jumbo_support = true;
			break;
		case 'c':
		case 'i':
			confname = xstrdup(optarg);
			if (!strncmp("-", confname, strlen("-")))
				ctx.cpus = 1;
			break;
		case 'u':
			ctx.uid = strtoul(optarg, NULL, 0);
			ctx.enforce = true;
			break;
		case 'g':
			ctx.gid = strtoul(optarg, NULL, 0);
			ctx.enforce = true;
			break;
		case 'k':
			printf("Option -k/--kernel-pull is no longer used and "
			       "will be removed in a future release!\n");
			break;
		case 'E':
			seed = strtoul(optarg, NULL, 0);
			reseed = false;
			break;
		case 'n':
			orig_num = strtoul(optarg, NULL, 0);
			ctx.num = orig_num;
			break;
		case 't':
			gap = strtoul(optarg, &ptr, 0);
			if (!gap && optarg == ptr)
				panic("Invalid gap param\n");

			if (!strncmp(ptr, "ns", strlen("ns"))) {
				delay.tv_sec = gap / 1000000000;
				delay.tv_nsec = gap % 1000000000;
			} else if (*ptr == '\0' || !strncmp(ptr, "us", strlen("us"))) {
				/*  Default to microseconds for backwards
				 *  compatibility if no postfix is given.
				 */
				delay.tv_sec = gap / 1000000;
				delay.tv_nsec = (gap % 1000000) * 1000;
			} else if (!strncmp(ptr, "ms", strlen("ms"))) {
				delay.tv_sec = gap / 1000;
				delay.tv_nsec = (gap % 1000) * 1000000;
			} else if (!strncmp(ptr, "s", strlen("s"))) {
				delay.tv_sec = gap;
				delay.tv_nsec = 0;
			} else {
				panic("Syntax error in time param!\n");
			}

			shaper_set_delay(&ctx.sh, delay.tv_sec, delay.tv_nsec);
			break;
		case 'b':
			rate = strtoul(optarg, &ptr, 0);
			if (!rate || optarg == ptr)
				panic("Invalid rate param\n");

			if (strncmp(ptr, "pps", strlen("pps")) == 0) {
				shape_type = SHAPER_PKTS;
			} else if (strncmp(ptr, "B", strlen("B")) == 0) {
				shape_type = SHAPER_BYTES;
			} else if (strncmp(ptr, "kB", strlen("kB")) == 0) {
				shape_type = SHAPER_BYTES;
				rate *= 1000;
			} else if (strncmp(ptr, "MB", strlen("MB")) == 0) {
				shape_type = SHAPER_BYTES;
				rate *= 1000 * 1000;
			} else if (strncmp(ptr, "GB", strlen("GB")) == 0) {
				shape_type = SHAPER_BYTES;
				rate *= 1000 * 1000 * 1000;
			} else if (strncmp(ptr, "kbit", strlen("kbit")) == 0) {
				shape_type = SHAPER_BYTES;
				rate *= 1000 / 8;
			} else if (strncmp(ptr, "Mbit", strlen("Mbit")) == 0) {
				shape_type = SHAPER_BYTES;
				rate *= 1000 * 1000 / 8;
			} else if (strncmp(ptr, "Gbit", strlen("Gbit")) == 0) {
				shape_type = SHAPER_BYTES;
				rate *= 1000 * 1000 * 1000 / 8;
			} else if (strncmp(ptr, "KiB", strlen("KiB")) == 0) {
				shape_type = SHAPER_BYTES;
				rate *= 1 << 10;
			} else if (strncmp(ptr, "MiB", strlen("MiB")) == 0) {
				shape_type = SHAPER_BYTES;
				rate *= 1 << 20;
			} else if (strncmp(ptr, "GiB", strlen("GiB")) == 0) {
				shape_type = SHAPER_BYTES;
				rate *= 1 << 30;
			} else {
				panic("Invalid unit type for rate\n");
			}

			shaper_set_rate(&ctx.sh, rate, shape_type);
			break;
		case 'S':
			ctx.reserve_size = strtoul(optarg, &ptr, 0);
			if (ctx.reserve_size == 0 && ptr == optarg)
				panic("Invalid ring size param\n");

			if (!strncmp(ptr, "KiB", strlen("KiB")))
				ctx.reserve_size *= 1 << 10;
			else if (!strncmp(ptr, "MiB", strlen("MiB")))
				ctx.reserve_size = 1 << 20;
			else if (!strncmp(ptr, "GiB", strlen("GiB")))
				ctx.reserve_size *= 1 << 30;
			else
				panic("Invalid ring size unit type\n");

			break;
		case '?':
			switch (optopt) {
			case 'd':
			case 'c':
			case 'n':
			case 'S':
			case 's':
			case 'P':
			case 'o':
			case 'E':
			case 'i':
			case 'k':
			case 'u':
			case 'g':
			case 't':
				panic("Option -%c requires an argument!\n",
				      optopt);
			default:
				if (isprint(optopt))
					printf("Unknown option character `0x%X\'!\n", optopt);
				die();
			}
		default:
			break;
		}
	}

	if (argc >= optind) {
		min_opts = 4;
		ctx.packet_str = argv2str(optind, argc, argv);
	}

	if (argc < min_opts)
		help();
	if (ctx.device == NULL)
		panic("No networking device given!\n");
	if (confname == NULL && !ctx.packet_str)
		panic("No configuration file or packet string given!\n");
	if (device_mtu(ctx.device) == 0)
		panic("This is no networking device!\n");

	register_signal(SIGINT, signal_handler);
	register_signal(SIGQUIT, signal_handler);
	register_signal(SIGTERM, signal_handler);
	register_signal(SIGHUP, signal_handler);

	protos_init(ctx.device);

	if (prio_high) {
		set_proc_prio(-20);
		set_sched_status(SCHED_FIFO, sched_get_priority_max(SCHED_FIFO));
	}

	if (set_sock_mem)
		set_system_socket_memory(vals, array_size(vals));
	xlockme();

	if (ctx.rfraw) {
		ctx.device_trans = xstrdup(ctx.device);
		xfree(ctx.device);

		enter_rfmon_mac80211(ctx.device_trans, &ctx.device);
		panic_handler_add(on_panic_del_rfmon, ctx.device);
		sleep(0);
	}

	if (shaper_is_set(&ctx.sh)) {
		prctl(PR_SET_TIMERSLACK, 1UL);
		/* Fall back to single core to not mess up correct timing.
		 * We are slow anyway!
		 */
		ctx.cpus = 1;
		slow = true;
	}

	/*
	 * If number of packets is smaller than number of CPUs use only as
	 * many CPUs as there are packets. Otherwise we end up sending more
	 * packets than intended or none at all.
	 */
	if (ctx.num)
		ctx.cpus = min_t(unsigned int, ctx.num, ctx.cpus);

	irq = device_irq_number(ctx.device);
	if (set_irq_aff)
		device_set_irq_affinity_list(irq, 0, ctx.cpus - 1);

	stats = setup_shared_var(ctx.cpus);

	for (i = 0; i < ctx.cpus; i++) {
		pid_t pid = fork();

		switch (pid) {
		case 0:
			if (reseed)
				seed = generate_srand_seed();
			srand(seed);

			cpu_affinity(i);
			main_loop(&ctx, confname, slow, i, invoke_cpp,
				  cpp_argv, orig_num);

			goto thread_out;
		case -1:
			panic("Cannot fork processes!\n");
		}
	}

	for (i = 0; i < ctx.cpus; i++) {
		int status;

		wait(&status);
		if (WEXITSTATUS(status) == EXIT_FAILURE)
			die();
	}

	if (ctx.rfraw)
		leave_rfmon_mac80211(ctx.device);

	if (set_sock_mem)
		reset_system_socket_memory(vals, array_size(vals));

	for (i = 0, tx_packets = tx_bytes = 0; i < ctx.cpus; i++) {
		while ((__get_state(i) & CPU_STATS_STATE_RES) == 0)
			sched_yield();

		tx_packets += stats[i].tx_packets;
		tx_bytes   += stats[i].tx_bytes;
	}

	fflush(stdout);
	printf("\n");
	printf("\r%12llu packets outgoing\n", tx_packets);
	printf("\r%12llu bytes outgoing\n", tx_bytes);
	for (i = 0; cpustats && i < ctx.cpus; i++) {
		printf("\r%12lu sec, %lu usec on CPU%d (%llu packets)\n",
		       stats[i].tv_sec, stats[i].tv_usec, i,
		       stats[i].tx_packets);
	}

thread_out:
	xunlockme();
	destroy_shared_var(stats, ctx.cpus);
	if (set_irq_aff)
		device_restore_irq_affinity_list();

	argv_free(cpp_argv);
	free(ctx.device);
	free(ctx.device_trans);
	free(ctx.rhost);
	free(confname);
	free(ctx.packet_str);

	return 0;
}