/* * netsniff-ng - the packet sniffing beast * Copyright 2011 - 2013 Daniel Borkmann. * Subject to the GPL, version 2. */ #include #include #include #include #include #include #include "pcap_io.h" #include "xmalloc.h" #include "built_in.h" #include "iosched.h" #include "ioops.h" static struct iovec iov[1024] __cacheline_aligned; static off_t iov_off_rd = 0, iov_slot = 0; static ssize_t pcap_sg_write(int fd, pcap_pkthdr_t *phdr, enum pcap_type type, const uint8_t *packet, size_t len) { ssize_t ret, hdrsize = pcap_get_hdr_length(phdr, type); if (unlikely(iov_slot == array_size(iov))) { ret = writev(fd, iov, array_size(iov)); if (ret < 0) panic("Writev I/O error: %s!\n", strerror(errno)); iov_slot = 0; } fmemcpy(iov[iov_slot].iov_base, &phdr->raw, hdrsize); iov[iov_slot].iov_len = hdrsize; fmemcpy(iov[iov_slot].iov_base + iov[iov_slot].iov_len, packet, len); ret = (iov[iov_slot].iov_len += len); iov_slot++; return ret; } static ssize_t __pcap_sg_inter_iov_hdr_read(int fd, pcap_pkthdr_t *phdr, size_t hdrsize) { int ret; size_t offset = 0; ssize_t remainder; offset = iov[iov_slot].iov_len - iov_off_rd; remainder = hdrsize - offset; if (remainder < 0) remainder = 0; bug_on(offset + remainder != hdrsize); fmemcpy(&phdr->raw, iov[iov_slot].iov_base + iov_off_rd, offset); iov_off_rd = 0; iov_slot++; if (iov_slot == array_size(iov)) { iov_slot = 0; ret = readv(fd, iov, array_size(iov)); if (unlikely(ret <= 0)) return -EIO; } fmemcpy(&phdr->raw + offset, iov[iov_slot].iov_base + iov_off_rd, remainder); iov_off_rd += remainder; return hdrsize; } static ssize_t __pcap_sg_inter_iov_data_read(int fd, uint8_t *packet, size_t hdrlen) { int ret; size_t offset = 0; ssize_t remainder; offset = iov[iov_slot].iov_len - iov_off_rd; remainder = hdrlen - offset; if (remainder < 0) remainder = 0; bug_on(offset + remainder != hdrlen); fmemcpy(packet, iov[iov_slot].iov_base + iov_off_rd, offset); iov_off_rd = 0; iov_slot++; if (iov_slot == array_size(iov)) { iov_slot = 0; ret = readv(fd, iov, array_size(iov)); if (unlikely(ret <= 0)) return -EIO; } fmemcpy(packet + offset, iov[iov_slot].iov_base + iov_off_rd, remainder); iov_off_rd += remainder; return hdrlen; } static ssize_t pcap_sg_read(int fd, pcap_pkthdr_t *phdr, enum pcap_type type, uint8_t *packet, size_t len) { ssize_t ret = 0; size_t hdrsize = pcap_get_hdr_length(phdr, type), hdrlen; if (likely(iov[iov_slot].iov_len - iov_off_rd >= hdrsize)) { fmemcpy(&phdr->raw, iov[iov_slot].iov_base + iov_off_rd, hdrsize); iov_off_rd += hdrsize; } else { ret = __pcap_sg_inter_iov_hdr_read(fd, phdr, hdrsize); if (unlikely(ret < 0)) return ret; } hdrlen = pcap_get_length(phdr, type); if (unlikely(hdrlen == 0 || hdrlen > len)) return -EINVAL; if (likely(iov[iov_slot].iov_len - iov_off_rd >= hdrlen)) { fmemcpy(packet, iov[iov_slot].iov_base + iov_off_rd, hdrlen); iov_off_rd += hdrlen; } else { ret = __pcap_sg_inter_iov_data_read(fd, packet, hdrlen); if (unlikely(ret < 0)) return ret; } return hdrsize + hdrlen; } static void pcap_sg_fsync(int fd) { ssize_t ret = writev(fd, iov, iov_slot); if (ret < 0) panic("Writev I/O error: %s!\n", strerror(errno)); iov_slot = 0; fdatasync(fd); } static void pcap_sg_init_once(bool enforce_prio) { if (enforce_prio) set_ioprio_rt(); } static int pcap_sg_prepare_access(int fd, enum pcap_mode mode, bool jumbo) { int ret; size_t i, len = 0; iov_slot = 0; len = jumbo ? (RUNTIME_PAGE_SIZE * 16) /* 64k max */ : (RUNTIME_PAGE_SIZE * 3) /* 12k max */; for (i = 0; i < array_size(iov); ++i) { iov[i].iov_base = xzmalloc_aligned(len, 64); iov[i].iov_len = len; } if (mode == PCAP_MODE_RD) { ret = readv(fd, iov, array_size(iov)); if (ret <= 0) return -EIO; iov_off_rd = 0; iov_slot = 0; } return 0; } static void pcap_sg_prepare_close(int fd __maybe_unused, enum pcap_mode mode __maybe_unused) { size_t i; for (i = 0; i < array_size(iov); ++i) xfree(iov[i].iov_base); } const struct pcap_file_ops pcap_sg_ops = { .init_once_pcap = pcap_sg_init_once, .pull_fhdr_pcap = pcap_generic_pull_fhdr, .push_fhdr_pcap = pcap_generic_push_fhdr, .prepare_access_pcap = pcap_sg_prepare_access, .prepare_close_pcap = pcap_sg_prepare_close, .read_pcap = pcap_sg_read, .write_pcap = pcap_sg_write, .fsync_pcap = pcap_sg_fsync, }; n='2' class='oid'>e9300a4b7bbae83af1f7703938c94cf6dc6d308f (patch) tree6e5ad424acbba79a91abc112557430e3ff570449 parent667121ace9dbafb368618dbabcf07901c962ddac (diff)
firewire: net: fix fragmented datagram_size off-by-one
RFC 2734 defines the datagram_size field in fragment encapsulation headers thus: datagram_size: The encoded size of the entire IP datagram. The value of datagram_size [...] SHALL be one less than the value of Total Length in the datagram's IP header (see STD 5, RFC 791). Accordingly, the eth1394 driver of Linux 2.6.36 and older set and got this field with a -/+1 offset: ether1394_tx() /* transmit */ ether1394_encapsulate_prep() hdr->ff.dg_size = dg_size - 1; ether1394_data_handler() /* receive */ if (hdr->common.lf == ETH1394_HDR_LF_FF) dg_size = hdr->ff.dg_size + 1; else dg_size = hdr->sf.dg_size + 1; Likewise, I observe OS X 10.4 and Windows XP Pro SP3 to transmit 1500 byte sized datagrams in fragments with datagram_size=1499 if link fragmentation is required. Only firewire-net sets and gets datagram_size without this offset. The result is lacking interoperability of firewire-net with OS X, Windows XP, and presumably Linux' eth1394. (I did not test with the latter.) For example, FTP data transfers to a Linux firewire-net box with max_rec smaller than the 1500 bytes MTU - from OS X fail entirely, - from Win XP start out with a bunch of fragmented datagrams which time out, then continue with unfragmented datagrams because Win XP temporarily reduces the MTU to 576 bytes. So let's fix firewire-net's datagram_size accessors. Note that firewire-net thereby loses interoperability with unpatched firewire-net, but only if link fragmentation is employed. (This happens with large broadcast datagrams, and with large datagrams on several FireWire CardBus cards with smaller max_rec than equivalent PCI cards, and it can be worked around by setting a small enough MTU.) Cc: stable@vger.kernel.org Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>