/* * Copyright (c) 2000-2001 Christoph Hellwig. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL"). * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Veritas filesystem driver - object location table support. */ #include #include #include #include "vxfs.h" #include "vxfs_olt.h" #include "vxfs_extern.h" static inline void vxfs_get_fshead(struct vxfs_oltfshead *fshp, struct vxfs_sb_info *infp) { BUG_ON(infp->vsi_fshino); infp->vsi_fshino = fs32_to_cpu(infp, fshp->olt_fsino[0]); } static inline void vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp) { BUG_ON(infp->vsi_iext); infp->vsi_iext = fs32_to_cpu(infp, ilistp->olt_iext[0]); } static inline u_long vxfs_oblock(struct super_block *sbp, daddr_t block, u_long bsize) { BUG_ON(sbp->s_blocksize % bsize); return (block * (sbp->s_blocksize / bsize)); } /** * vxfs_read_olt - read olt * @sbp: superblock of the filesystem * @bsize: blocksize of the filesystem * * Description: * vxfs_read_olt reads the olt of the filesystem described by @sbp * into main memory and does some basic setup. * * Returns: * Zero on success, else a negative error code. */ int vxfs_read_olt(struct super_block *sbp, u_long bsize) { struct vxfs_sb_info *infp = VXFS_SBI(sbp); struct buffer_head *bp; struct vxfs_olt *op; char *oaddr, *eaddr; bp = sb_bread(sbp, vxfs_oblock(sbp, infp->vsi_oltext, bsize)); if (!bp || !bp->b_data) goto fail; op = (struct vxfs_olt *)bp->b_data; if (fs32_to_cpu(infp, op->olt_magic) != VXFS_OLT_MAGIC) { printk(KERN_NOTICE "vxfs: ivalid olt magic number\n"); goto fail; } /* * It is in theory possible that vsi_oltsize is > 1. * I've not seen any such filesystem yet and I'm lazy.. --hch */ if (infp->vsi_oltsize > 1) { printk(KERN_NOTICE "vxfs: oltsize > 1 detected.\n"); printk(KERN_NOTICE "vxfs: please notify hch@infradead.org\n"); goto fail; } oaddr = bp->b_data + fs32_to_cpu(infp, op->olt_size); eaddr = bp->b_data + (infp->vsi_oltsize * sbp->s_blocksize); while (oaddr < eaddr) { struct vxfs_oltcommon *ocp = (struct vxfs_oltcommon *)oaddr; switch (fs32_to_cpu(infp, ocp->olt_type)) { case VXFS_OLT_FSHEAD: vxfs_get_fshead((struct vxfs_oltfshead *)oaddr, infp); break; case VXFS_OLT_ILIST: vxfs_get_ilist((struct vxfs_oltilist *)oaddr, infp); break; } oaddr += fs32_to_cpu(infp, ocp->olt_size); } brelse(bp); return (infp->vsi_fshino && infp->vsi_iext) ? 0 : -EINVAL; fail: brelse(bp); return -EINVAL; } mode:
authorMaxim Patlasov <mpatlasov@virtuozzo.com>2016-12-12 14:32:44 -0800
committerChris Mason <clm@fb.com>2016-12-13 11:01:30 -0800
commit2939e1a86f758b55cdba73e29397dd3d94df13bc (patch)
tree6024381b6fe04ed125c7bb6b613d3c3d3bf2b983 /net/nfc
parent5f52a2c512a55500349aa261e469d099ede0f256 (diff)
btrfs: limit async_work allocation and worker func duration
Problem statement: unprivileged user who has read-write access to more than one btrfs subvolume may easily consume all kernel memory (eventually triggering oom-killer). Reproducer (./mkrmdir below essentially loops over mkdir/rmdir): [root@kteam1 ~]# cat prep.sh DEV=/dev/sdb mkfs.btrfs -f $DEV mount $DEV /mnt for i in `seq 1 16` do mkdir /mnt/$i btrfs subvolume create /mnt/SV_$i ID=`btrfs subvolume list /mnt |grep "SV_$i$" |cut -d ' ' -f 2` mount -t btrfs -o subvolid=$ID $DEV /mnt/$i chmod a+rwx /mnt/$i done [root@kteam1 ~]# sh prep.sh [maxim@kteam1 ~]$ for i in `seq 1 16`; do ./mkrmdir /mnt/$i 2000 2000 & done [root@kteam1 ~]# for i in `seq 1 4`; do grep "kmalloc-128" /proc/slabinfo | grep -v dma; sleep 60; done kmalloc-128 10144 10144 128 32 1 : tunables 0 0 0 : slabdata 317 317 0 kmalloc-128 9992352 9992352 128 32 1 : tunables 0 0 0 : slabdata 312261 312261 0 kmalloc-128 24226752 24226752 128 32 1 : tunables 0 0 0 : slabdata 757086 757086 0 kmalloc-128 42754240 42754240 128 32 1 : tunables 0 0 0 : slabdata 1336070 1336070 0 The huge numbers above come from insane number of async_work-s allocated and queued by btrfs_wq_run_delayed_node. The problem is caused by btrfs_wq_run_delayed_node() queuing more and more works if the number of delayed items is above BTRFS_DELAYED_BACKGROUND. The worker func (btrfs_async_run_delayed_root) processes at least BTRFS_DELAYED_BATCH items (if they are present in the list). So, the machinery works as expected while the list is almost empty. As soon as it is getting bigger, worker func starts to process more than one item at a time, it takes longer, and the chances to have async_works queued more than needed is getting higher. The problem above is worsened by another flaw of delayed-inode implementation: if async_work was queued in a throttling branch (number of items >= BTRFS_DELAYED_WRITEBACK), corresponding worker func won't quit until the number of items < BTRFS_DELAYED_BACKGROUND / 2. So, it is possible that the func occupies CPU infinitely (up to 30sec in my experiments): while the func is trying to drain the list, the user activity may add more and more items to the list. The patch fixes both problems in straightforward way: refuse queuing too many works in btrfs_wq_run_delayed_node and bail out of worker func if at least BTRFS_DELAYED_WRITEBACK items are processed. Changed in v2: remove support of thresh == NO_THRESHOLD. Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com> Signed-off-by: Chris Mason <clm@fb.com> Cc: stable@vger.kernel.org # v3.15+
Diffstat (limited to 'net/nfc')