/* * netsniff-ng - the packet sniffing beast * Copyright 2011 - 2013 Daniel Borkmann. * Subject to the GPL, version 2. */ #include #include #include #include #include #include #include "pcap_io.h" #include "xmalloc.h" #include "built_in.h" #include "iosched.h" #include "ioops.h" static struct iovec iov[1024] __cacheline_aligned; static off_t iov_off_rd = 0, iov_slot = 0; static ssize_t pcap_sg_write(int fd, pcap_pkthdr_t *phdr, enum pcap_type type, const uint8_t *packet, size_t len) { ssize_t ret, hdrsize = pcap_get_hdr_length(phdr, type); if (unlikely(iov_slot == array_size(iov))) { ret = writev(fd, iov, array_size(iov)); if (ret < 0) panic("Writev I/O error: %s!\n", strerror(errno)); iov_slot = 0; } fmemcpy(iov[iov_slot].iov_base, &phdr->raw, hdrsize); iov[iov_slot].iov_len = hdrsize; fmemcpy(iov[iov_slot].iov_base + iov[iov_slot].iov_len, packet, len); ret = (iov[iov_slot].iov_len += len); iov_slot++; return ret; } static ssize_t __pcap_sg_inter_iov_hdr_read(int fd, pcap_pkthdr_t *phdr, size_t hdrsize) { int ret; size_t offset = 0; ssize_t remainder; offset = iov[iov_slot].iov_len - iov_off_rd; remainder = hdrsize - offset; if (remainder < 0) remainder = 0; bug_on(offset + remainder != hdrsize); fmemcpy(&phdr->raw, iov[iov_slot].iov_base + iov_off_rd, offset); iov_off_rd = 0; iov_slot++; if (iov_slot == array_size(iov)) { iov_slot = 0; ret = readv(fd, iov, array_size(iov)); if (unlikely(ret <= 0)) return -EIO; } fmemcpy(&phdr->raw + offset, iov[iov_slot].iov_base + iov_off_rd, remainder); iov_off_rd += remainder; return hdrsize; } static ssize_t __pcap_sg_inter_iov_data_read(int fd, uint8_t *packet, size_t hdrlen) { int ret; size_t offset = 0; ssize_t remainder; offset = iov[iov_slot].iov_len - iov_off_rd; remainder = hdrlen - offset; if (remainder < 0) remainder = 0; bug_on(offset + remainder != hdrlen); fmemcpy(packet, iov[iov_slot].iov_base + iov_off_rd, offset); iov_off_rd = 0; iov_slot++; if (iov_slot == array_size(iov)) { iov_slot = 0; ret = readv(fd, iov, array_size(iov)); if (unlikely(ret <= 0)) return -EIO; } fmemcpy(packet + offset, iov[iov_slot].iov_base + iov_off_rd, remainder); iov_off_rd += remainder; return hdrlen; } static ssize_t pcap_sg_read(int fd, pcap_pkthdr_t *phdr, enum pcap_type type, uint8_t *packet, size_t len) { ssize_t ret = 0; size_t hdrsize = pcap_get_hdr_length(phdr, type), hdrlen; if (likely(iov[iov_slot].iov_len - iov_off_rd >= hdrsize)) { fmemcpy(&phdr->raw, iov[iov_slot].iov_base + iov_off_rd, hdrsize); iov_off_rd += hdrsize; } else { ret = __pcap_sg_inter_iov_hdr_read(fd, phdr, hdrsize); if (unlikely(ret < 0)) return ret; } hdrlen = pcap_get_length(phdr, type); if (unlikely(hdrlen == 0 || hdrlen > len)) return -EINVAL; if (likely(iov[iov_slot].iov_len - iov_off_rd >= hdrlen)) { fmemcpy(packet, iov[iov_slot].iov_base + iov_off_rd, hdrlen); iov_off_rd += hdrlen; } else { ret = __pcap_sg_inter_iov_data_read(fd, packet, hdrlen); if (unlikely(ret < 0)) return ret; } return hdrsize + hdrlen; } static void pcap_sg_fsync(int fd) { ssize_t ret = writev(fd, iov, iov_slot); if (ret < 0) panic("Writev I/O error: %s!\n", strerror(errno)); iov_slot = 0; fdatasync(fd); } static void pcap_sg_init_once(void) { set_ioprio_rt(); } static int pcap_sg_prepare_access(int fd, enum pcap_mode mode, bool jumbo) { int ret; size_t i, len = 0; iov_slot = 0; len = jumbo ? (RUNTIME_PAGE_SIZE * 16) /* 64k max */ : (RUNTIME_PAGE_SIZE * 3) /* 12k max */; for (i = 0; i < array_size(iov); ++i) { iov[i].iov_base = xzmalloc_aligned(len, 64); iov[i].iov_len = len; } if (mode == PCAP_MODE_RD) { ret = readv(fd, iov, array_size(iov)); if (ret <= 0) return -EIO; iov_off_rd = 0; iov_slot = 0; } return 0; } static void pcap_sg_prepare_close(int fd __maybe_unused, enum pcap_mode mode __maybe_unused) { size_t i; for (i = 0; i < array_size(iov); ++i) xfree(iov[i].iov_base); } const struct pcap_file_ops pcap_sg_ops = { .init_once_pcap = pcap_sg_init_once, .pull_fhdr_pcap = pcap_generic_pull_fhdr, .push_fhdr_pcap = pcap_generic_push_fhdr, .prepare_access_pcap = pcap_sg_prepare_access, .prepare_close_pcap = pcap_sg_prepare_close, .read_pcap = pcap_sg_read, .write_pcap = pcap_sg_write, .fsync_pcap = pcap_sg_fsync, }; td> tree647a7c6d70173cbb7c40237a3dcfbd81346014d8 parent89f39af129382a40d7cd1f6914617282cfeee28e (diff)
fs/super.c: don't fool lockdep in freeze_super() and thaw_super() paths
sb_wait_write()->percpu_rwsem_release() fools lockdep to avoid the false-positives. Now that xfs was fixed by Dave's commit dbad7c993053 ("xfs: stop holding ILOCK over filldir callbacks") we can remove it and change freeze_super() and thaw_super() to run with s_writers.rw_sem locks held; we add two trivial helpers for that, lockdep_sb_freeze_release() and lockdep_sb_freeze_acquire(). xfstests-dev/check `grep -il freeze tests/*/???` does not trigger any warning from lockdep. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
ode_pages2_range() and invalidate_mapping_pages() just delete all exceptional radix tree entries they find. For DAX this is not desirable as we track cache dirtiness in these entries and when they are evicted, we may not flush caches although it is necessary. This can for example manifest when we write to the same block both via mmap and via write(2) (to different offsets) and fsync(2) then does not properly flush CPU caches when modification via write(2) was the last one. Create appropriate DAX functions to handle invalidation of DAX entries for invalidate_inode_pages2_range() and invalidate_mapping_pages() and wire them up into the corresponding mm functions. Acked-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'include/uapi/rdma')