/* * netsniff-ng - the packet sniffing beast * Copyright 2011 - 2013 Daniel Borkmann. * Subject to the GPL, version 2. */ #include #include #include #include #include #include #include "pcap_io.h" #include "xmalloc.h" #include "xio.h" #include "xutils.h" #include "built_in.h" static struct iovec iov[1024] __cacheline_aligned; static off_t iov_off_rd = 0, iov_slot = 0; static ssize_t pcap_sg_write(int fd, pcap_pkthdr_t *phdr, enum pcap_type type, const uint8_t *packet, size_t len) { ssize_t ret, hdrsize = pcap_get_hdr_length(phdr, type); if (unlikely(iov_slot == array_size(iov))) { ret = writev(fd, iov, array_size(iov)); if (ret < 0) panic("Writev I/O error: %s!\n", strerror(errno)); iov_slot = 0; } fmemcpy(iov[iov_slot].iov_base, &phdr->raw, hdrsize); iov[iov_slot].iov_len = hdrsize; fmemcpy(iov[iov_slot].iov_base + iov[iov_slot].iov_len, packet, len); ret = (iov[iov_slot].iov_len += len); iov_slot++; return ret; } static ssize_t __pcap_sg_inter_iov_hdr_read(int fd, pcap_pkthdr_t *phdr, enum pcap_type type, uint8_t *packet, size_t len, size_t hdrsize) { int ret; size_t offset = 0; ssize_t remainder; offset = iov[iov_slot].iov_len - iov_off_rd; remainder = hdrsize - offset; if (remainder < 0) remainder = 0; bug_on(offset + remainder != hdrsize); fmemcpy(&phdr->raw, iov[iov_slot].iov_base + iov_off_rd, offset); iov_off_rd = 0; iov_slot++; if (iov_slot == array_size(iov)) { iov_slot = 0; ret = readv(fd, iov, array_size(iov)); if (unlikely(ret <= 0)) return -EIO; } fmemcpy(&phdr->raw + offset, iov[iov_slot].iov_base + iov_off_rd, remainder); iov_off_rd += remainder; return hdrsize; } static ssize_t __pcap_sg_inter_iov_data_read(int fd, uint8_t *packet, size_t len, size_t hdrlen) { int ret; size_t offset = 0; ssize_t remainder; offset = iov[iov_slot].iov_len - iov_off_rd; remainder = hdrlen - offset; if (remainder < 0) remainder = 0; bug_on(offset + remainder != hdrlen); fmemcpy(packet, iov[iov_slot].iov_base + iov_off_rd, offset); iov_off_rd = 0; iov_slot++; if (iov_slot == array_size(iov)) { iov_slot = 0; ret = readv(fd, iov, array_size(iov)); if (unlikely(ret <= 0)) return -EIO; } fmemcpy(packet + offset, iov[iov_slot].iov_base + iov_off_rd, remainder); iov_off_rd += remainder; return hdrlen; } static ssize_t pcap_sg_read(int fd, pcap_pkthdr_t *phdr, enum pcap_type type, uint8_t *packet, size_t len) { ssize_t ret = 0; size_t hdrsize = pcap_get_hdr_length(phdr, type), hdrlen; if (likely(iov[iov_slot].iov_len - iov_off_rd >= hdrsize)) { fmemcpy(&phdr->raw, iov[iov_slot].iov_base + iov_off_rd, hdrsize); iov_off_rd += hdrsize; } else { ret = __pcap_sg_inter_iov_hdr_read(fd, phdr, type, packet, len, hdrsize); if (unlikely(ret < 0)) return ret; } hdrlen = pcap_get_length(phdr, type); if (unlikely(hdrlen == 0 || hdrlen > len)) return -EINVAL; if (likely(iov[iov_slot].iov_len - iov_off_rd >= hdrlen)) { fmemcpy(packet, iov[iov_slot].iov_base + iov_off_rd, hdrlen); iov_off_rd += hdrlen; } else { ret = __pcap_sg_inter_iov_data_read(fd, packet, len, hdrlen); if (unlikely(ret < 0)) return ret; } return hdrsize + hdrlen; } static void pcap_sg_fsync(int fd) { ssize_t ret = writev(fd, iov, iov_slot); if (ret < 0) panic("Writev I/O error: %s!\n", strerror(errno)); iov_slot = 0; fdatasync(fd); } static void pcap_sg_init_once(void) { set_ioprio_rt(); } static int pcap_sg_prepare_access(int fd, enum pcap_mode mode, bool jumbo) { int i, ret; size_t len = 0; iov_slot = 0; len = jumbo ? (PAGE_SIZE * 16) /* 64k max */ : (PAGE_SIZE * 3) /* 12k max */; for (i = 0; i < array_size(iov); ++i) { iov[i].iov_base = xzmalloc_aligned(len, 64); iov[i].iov_len = len; } if (mode == PCAP_MODE_RD) { ret = readv(fd, iov, array_size(iov)); if (ret <= 0) return -EIO; iov_off_rd = 0; iov_slot = 0; } return 0; } static void pcap_sg_prepare_close(int fd, enum pcap_mode mode) { int i; for (i = 0; i < array_size(iov); ++i) xfree(iov[i].iov_base); } const struct pcap_file_ops pcap_sg_ops = { .init_once_pcap = pcap_sg_init_once, .pull_fhdr_pcap = pcap_generic_pull_fhdr, .push_fhdr_pcap = pcap_generic_push_fhdr, .prepare_access_pcap = pcap_sg_prepare_access, .prepare_close_pcap = pcap_sg_prepare_close, .read_pcap = pcap_sg_read, .write_pcap = pcap_sg_write, .fsync_pcap = pcap_sg_fsync, }; span='2' class='oid'>02754e0a484a50a92d44c38879f2cb2792ebc572 (patch) tree0ad938a86ba3f2b53448a41d4d2adbacf750f80a parent0e07f663c90154079b287931257defbdb1d5405a (diff)
lib/stackdepot.c: bump stackdepot capacity from 16MB to 128MB
KASAN uses stackdepot to memorize stacks for all kmalloc/kfree calls. Current stackdepot capacity is 16MB (1024 top level entries x 4 pages on second level). Size of each stack is (num_frames + 3) * sizeof(long). Which gives us ~84K stacks. This capacity was chosen empirically and it is enough to run kernel normally. However, when lots of configs are enabled and a fuzzer tries to maximize code coverage, it easily hits the limit within tens of minutes. I've tested for long a time with number of top level entries bumped 4x (4096). And I think I've seen overflow only once. But I don't have all configs enabled and code coverage has not reached maximum yet. So bump it 8x to 8192. Since we have two-level table, memory cost of this is very moderate -- currently the top-level table is 8KB, with this patch it is 64KB, which is negligible under KASAN. Here is some approx math. 128MB allows us to memorize ~670K stacks (assuming stack is ~200b). I've grepped kernel for kmalloc|kfree|kmem_cache_alloc|kmem_cache_free| kzalloc|kstrdup|kstrndup|kmemdup and it gives ~60K matches. Most of alloc/free call sites are reachable with only one stack. But some utility functions can have large fanout. Assuming average fanout is 5x, total number of alloc/free stacks is ~300K. Link: http://lkml.kernel.org/r/1476458416-122131-1-git-send-email-dvyukov@google.com Signed-off-by: Dmitry Vyukov <dvyukov@google.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Alexander Potapenko <glider@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Baozeng Ding <sploving1@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>