/* * file.c * * Copyright (C) 1995, 1996 by Volker Lendecke * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include #include #include #include #include #include "ncp_fs.h" static int ncp_fsync(struct file *file, loff_t start, loff_t end, int datasync) { return filemap_write_and_wait_range(file->f_mapping, start, end); } /* * Open a file with the specified read/write mode. */ int ncp_make_open(struct inode *inode, int right) { int error; int access; error = -EINVAL; if (!inode) { pr_err("%s: got NULL inode\n", __func__); goto out; } ncp_dbg(1, "opened=%d, volume # %u, dir entry # %u\n", atomic_read(&NCP_FINFO(inode)->opened), NCP_FINFO(inode)->volNumber, NCP_FINFO(inode)->dirEntNum); error = -EACCES; mutex_lock(&NCP_FINFO(inode)->open_mutex); if (!atomic_read(&NCP_FINFO(inode)->opened)) { struct ncp_entry_info finfo; int result; /* tries max. rights */ finfo.access = O_RDWR; result = ncp_open_create_file_or_subdir(NCP_SERVER(inode), inode, NULL, OC_MODE_OPEN, 0, AR_READ | AR_WRITE, &finfo); if (!result) goto update; /* RDWR did not succeeded, try readonly or writeonly as requested */ switch (right) { case O_RDONLY: finfo.access = O_RDONLY; result = ncp_open_create_file_or_subdir(NCP_SERVER(inode), inode, NULL, OC_MODE_OPEN, 0, AR_READ, &finfo); break; case O_WRONLY: finfo.access = O_WRONLY; result = ncp_open_create_file_or_subdir(NCP_SERVER(inode), inode, NULL, OC_MODE_OPEN, 0, AR_WRITE, &finfo); break; } if (result) { ncp_vdbg("failed, result=%d\n", result); goto out_unlock; } /* * Update the inode information. */ update: ncp_update_inode(inode, &finfo); atomic_set(&NCP_FINFO(inode)->opened, 1); } access = NCP_FINFO(inode)->access; ncp_vdbg("file open, access=%x\n", access); if (access == right || access == O_RDWR) { atomic_inc(&NCP_FINFO(inode)->opened); error = 0; } out_unlock: mutex_unlock(&NCP_FINFO(inode)->open_mutex); out: return error; } static ssize_t ncp_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); size_t already_read = 0; off_t pos = iocb->ki_pos; size_t bufsize; int error; void *freepage; size_t freelen; ncp_dbg(1, "enter %pD2\n", file); if (!iov_iter_count(to)) return 0; if (pos > inode->i_sb->s_maxbytes) return 0; iov_iter_truncate(to, inode->i_sb->s_maxbytes - pos); error = ncp_make_open(inode, O_RDONLY); if (error) { ncp_dbg(1, "open failed, error=%d\n", error); return error; } bufsize = NCP_SERVER(inode)->buffer_size; error = -EIO; freelen = ncp_read_bounce_size(bufsize); freepage = vmalloc(freelen); if (!freepage) goto outrel; error = 0; /* First read in as much as possible for each bufsize. */ while (iov_iter_count(to)) { int read_this_time; size_t to_read = min_t(size_t, bufsize - (pos % bufsize), iov_iter_count(to)); error = ncp_read_bounce(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, pos, to_read, to, &read_this_time, freepage, freelen); if (error) { error = -EIO; /* NW errno -> Linux errno */ break; } pos += read_this_time; already_read += read_this_time; if (read_this_time != to_read) break; } vfree(freepage); iocb->ki_pos = pos; file_accessed(file); ncp_dbg(1, "exit %pD2\n", file); outrel: ncp_inode_close(inode); return already_read ? already_read : error; } static ssize_t ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); size_t already_written = 0; size_t bufsize; int errno; void *bouncebuffer; off_t pos; ncp_dbg(1, "enter %pD2\n", file); errno = generic_write_checks(iocb, from); if (errno <= 0) return errno; errno = ncp_make_open(inode, O_WRONLY); if (errno) { ncp_dbg(1, "open failed, error=%d\n", errno); return errno; } bufsize = NCP_SERVER(inode)->buffer_size; errno = file_update_time(file); if (errno) goto outrel; bouncebuffer = vmalloc(bufsize); if (!bouncebuffer) { errno = -EIO; /* -ENOMEM */ goto outrel; } pos = iocb->ki_pos; while (iov_iter_count(from)) { int written_this_time; size_t to_write = min_t(size_t, bufsize - (pos % bufsize), iov_iter_count(from)); if (!copy_from_iter_full(bouncebuffer, to_write, from)) { errno = -EFAULT; break; } if (ncp_write_kernel(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, pos, to_write, bouncebuffer, &written_this_time) != 0) { errno = -EIO; break; } pos += written_this_time; already_written += written_this_time; if (written_this_time != to_write) break; } vfree(bouncebuffer); iocb->ki_pos = pos; if (pos > i_size_read(inode)) { inode_lock(inode); if (pos > i_size_read(inode)) i_size_write(inode, pos); inode_unlock(inode); } ncp_dbg(1, "exit %pD2\n", file); outrel: ncp_inode_close(inode); return already_written ? already_written : errno; } static int ncp_release(struct inode *inode, struct file *file) { if (ncp_make_closed(inode)) { ncp_dbg(1, "failed to close\n"); } return 0; } const struct file_operations ncp_file_operations = { .llseek = generic_file_llseek, .read_iter = ncp_file_read_iter, .write_iter = ncp_file_write_iter, .unlocked_ioctl = ncp_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ncp_compat_ioctl, #endif .mmap = ncp_mmap, .release = ncp_release, .fsync = ncp_fsync, }; const struct inode_operations ncp_file_inode_operations = { .setattr = ncp_notify_change, }; t/diff/include/trace/events/tlb.h?id=966d2b04e070bc040319aaebfec09e0144dc3341&id2=1b1bc42c1692e9b62756323c675a44cb1a1f9dbd'>diff)
percpu-refcount: fix reference leak during percpu-atomic transition
percpu_ref_tryget() and percpu_ref_tryget_live() should return "true" IFF they acquire a reference. But the return value from atomic_long_inc_not_zero() is a long and may have high bits set, e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines is bool so the reference may actually be acquired but the routines return "false" which results in a reference leak since the caller assumes it does not need to do a corresponding percpu_ref_put(). This was seen when performing CPU hotplug during I/O, as hangs in blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start) raced with percpu_ref_tryget (blk_mq_timeout_work). Sample stack trace: __switch_to+0x2c0/0x450 __schedule+0x2f8/0x970 schedule+0x48/0xc0 blk_mq_freeze_queue_wait+0x94/0x120 blk_mq_queue_reinit_work+0xb8/0x180 blk_mq_queue_reinit_prepare+0x84/0xa0 cpuhp_invoke_callback+0x17c/0x600 cpuhp_up_callbacks+0x58/0x150 _cpu_up+0xf0/0x1c0 do_cpu_up+0x120/0x150 cpu_subsys_online+0x64/0xe0 device_online+0xb4/0x120 online_store+0xb4/0xc0 dev_attr_store+0x68/0xa0 sysfs_kf_write+0x80/0xb0 kernfs_fop_write+0x17c/0x250 __vfs_write+0x6c/0x1e0 vfs_write+0xd0/0x270 SyS_write+0x6c/0x110 system_call+0x38/0xe0 Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS, and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests. However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0 and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set. The fix is to make the tryget routines use an actual boolean internally instead of the atomic long result truncated to a int. Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751 Signed-off-by: Douglas Miller <dougmill@linux.vnet.ibm.com> Reviewed-by: Jens Axboe <axboe@fb.com> Signed-off-by: Tejun Heo <tj@kernel.org> Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints") Cc: stable@vger.kernel.org # v3.18+
Diffstat (limited to 'include/trace/events/tlb.h')