/*
 *  linux/fs/ext4/fsync.c
 *
 *  Copyright (C) 1993  Stephen Tweedie (sct@redhat.com)
 *  from
 *  Copyright (C) 1992  Remy Card (card@masi.ibp.fr)
 *                      Laboratoire MASI - Institut Blaise Pascal
 *                      Universite Pierre et Marie Curie (Paris VI)
 *  from
 *  linux/fs/minix/truncate.c   Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  ext4fs fsync primitive
 *
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 *
 *  Removed unnecessary code duplication for little endian machines
 *  and excessive __inline__s.
 *        Andi Kleen, 1997
 *
 * Major simplications and cleanup - we only need to do the metadata, because
 * we can depend on generic_block_fdatasync() to sync the data blocks.
 */

#include <linux/time.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>

#include "ext4.h"
#include "ext4_jbd2.h"

#include <trace/events/ext4.h>

/*
 * If we're not journaling and this is a just-created file, we have to
 * sync our parent directory (if it was freshly created) since
 * otherwise it will only be written by writeback, leaving a huge
 * window during which a crash may lose the file.  This may apply for
 * the parent directory's parent as well, and so on recursively, if
 * they are also freshly created.
 */
static int ext4_sync_parent(struct inode *inode)
{
	struct dentry *dentry = NULL;
	struct inode *next;
	int ret = 0;

	if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY))
		return 0;
	inode = igrab(inode);
	while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
		ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
		dentry = d_find_any_alias(inode);
		if (!dentry)
			break;
		next = igrab(d_inode(dentry->d_parent));
		dput(dentry);
		if (!next)
			break;
		iput(inode);
		inode = next;
		/*
		 * The directory inode may have gone through rmdir by now. But
		 * the inode itself and its blocks are still allocated (we hold
		 * a reference to the inode so it didn't go through
		 * ext4_evict_inode()) and so we are safe to flush metadata
		 * blocks and the inode.
		 */
		ret = sync_mapping_buffers(inode->i_mapping);
		if (ret)
			break;
		ret = sync_inode_metadata(inode, 1);
		if (ret)
			break;
	}
	iput(inode);
	return ret;
}

/*
 * akpm: A new design for ext4_sync_file().
 *
 * This is only called from sys_fsync(), sys_fdatasync() and sys_msync().
 * There cannot be a transaction open by this task.
 * Another task could have dirtied this inode.  Its data can be in any
 * state in the journalling system.
 *
 * What we do is just kick off a commit and wait on it.  This will snapshot the
 * inode to disk.
 */

int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
	struct inode *inode = file->f_mapping->host;
	struct ext4_inode_info *ei = EXT4_I(inode);
	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
	int ret = 0, err;
	tid_t commit_tid;
	bool needs_barrier = false;

	J_ASSERT(ext4_journal_current_handle() == NULL);

	trace_ext4_sync_file_enter(file, datasync);

	if (inode->i_sb->s_flags & MS_RDONLY) {
		/* Make sure that we read updated s_mount_flags value */
		smp_rmb();
		if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
			ret = -EROFS;
		goto out;
	}

	if (!journal) {
		ret = __generic_file_fsync(file, start, end, datasync);
		if (!ret)
			ret = ext4_sync_parent(inode);
		if (test_opt(inode->i_sb, BARRIER))
			goto issue_flush;
		goto out;
	}

	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
	if (ret)
		return ret;
	/*
	 * data=writeback,ordered:
	 *  The caller's filemap_fdatawrite()/wait will sync the data.
	 *  Metadata is in the journal, we wait for proper transaction to
	 *  commit here.
	 *
	 * data=journal:
	 *  filemap_fdatawrite won't do anything (the buffers are clean).
	 *  ext4_force_commit will write the file data into the journal and
	 *  will wait on that.
	 *  filemap_fdatawait() will encounter a ton of newly-dirtied pages
	 *  (they were dirtied by commit).  But that's OK - the blocks are
	 *  safe in-journal, which is all fsync() needs to ensure.
	 */
	if (ext4_should_journal_data(inode)) {
		ret = ext4_force_commit(inode->i_sb);
		goto out;
	}

	commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
	if (journal->j_flags & JBD2_BARRIER &&
	    !jbd2_trans_will_send_data_barrier(journal, commit_tid))
		needs_barrier = true;
	ret = jbd2_complete_transaction(journal, commit_tid);
	if (needs_barrier) {
	issue_flush:
		err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
		if (!ret)
			ret = err;
	}
out:
	trace_ext4_sync_file_exit(inode, ret);
	return ret;
}
><noscript><input type='submit' value='reload'/></noscript></td></tr></table></form></div><table summary='commit info' class='commit-info'>
<tr><th>author</th><td>Thomas Gleixner &lt;tglx@linutronix.de&gt;</td><td class='right'>2017-01-31 09:37:34 +0100</td></tr>
<tr><th>committer</th><td>Thomas Gleixner &lt;tglx@linutronix.de&gt;</td><td class='right'>2017-01-31 21:47:58 +0100</td></tr>
<tr><th>commit</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/commit/include/uapi/scsi/fc/fc_gs.h?id=0becc0ae5b42828785b589f686725ff5bc3b9b25'>0becc0ae5b42828785b589f686725ff5bc3b9b25</a> (<a href='/cgit.cgi/linux/net-next.git/patch/include/uapi/scsi/fc/fc_gs.h?id=0becc0ae5b42828785b589f686725ff5bc3b9b25'>patch</a>)</td></tr>
<tr><th>tree</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/tree/?id=0becc0ae5b42828785b589f686725ff5bc3b9b25'>be6d0e1f37c38ed0a7dd5da2d4b1e93f0fb43101</a> /<a href='/cgit.cgi/linux/net-next.git/tree/include/uapi/scsi/fc/fc_gs.h?id=0becc0ae5b42828785b589f686725ff5bc3b9b25'>include/uapi/scsi/fc/fc_gs.h</a></td></tr>
<tr><th>parent</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/commit/include/uapi/scsi/fc/fc_gs.h?id=24c2503255d35c269b67162c397a1a1c1e02f6ce'>24c2503255d35c269b67162c397a1a1c1e02f6ce</a> (<a href='/cgit.cgi/linux/net-next.git/diff/include/uapi/scsi/fc/fc_gs.h?id=0becc0ae5b42828785b589f686725ff5bc3b9b25&amp;id2=24c2503255d35c269b67162c397a1a1c1e02f6ce'>diff</a>)</td></tr></table>
<div class='commit-subject'>x86/mce: Make timer handling more robust</div><div class='commit-msg'>Erik reported that on a preproduction hardware a CMCI storm triggers the
BUG_ON in add_timer_on(). The reason is that the per CPU MCE timer is
started by the CMCI logic before the MCE CPU hotplug callback starts the
timer with add_timer_on(). So the timer is already queued which triggers
the BUG.

Using add_timer_on() is pretty pointless in this code because the timer is
strictlty per CPU, initialized as pinned and all operations which arm the
timer happen on the CPU to which the timer belongs.

Simplify the whole machinery by using mod_timer() instead of add_timer_on()
which avoids the problem because mod_timer() can handle already queued
timers. Use __start_timer() everywhere so the earliest armed expiry time is
preserved.

Reported-by: Erik Veijola &lt;erik.veijola@intel.com&gt;
Tested-by: Borislav Petkov &lt;bp@alien8.de&gt;
Signed-off-by: Thomas Gleixner &lt;tglx@linutronix.de&gt;
Reviewed-by: Borislav Petkov &lt;bp@alien8.de&gt;
Cc: Tony Luck &lt;tony.luck@intel.com&gt;
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701310936080.3457@nanos
Signed-off-by: Thomas Gleixner &lt;tglx@linutronix.de&gt;

</div><div class='diffstat-header'><a href='/cgit.cgi/linux/net-next.git/diff/?id=0becc0ae5b42828785b589f686725ff5bc3b9b25'>Diffstat</a> (limited to 'include/uapi/scsi/fc/fc_gs.h')</div><table summary='diffstat' class='diffstat'>