/*
 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
 *
 * Provides a framework for enqueueing and running callbacks from hardirq
 * context. The enqueueing is NMI-safe.
 */

#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/irq_work.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/irqflags.h>
#include <linux/sched.h>
#include <linux/tick.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <asm/processor.h>


static DEFINE_PER_CPU(struct llist_head, raised_list);
static DEFINE_PER_CPU(struct llist_head, lazy_list);

/*
 * Claim the entry so that no one else will poke at it.
 */
static bool irq_work_claim(struct irq_work *work)
{
	unsigned long flags, oflags, nflags;

	/*
	 * Start with our best wish as a premise but only trust any
	 * flag value after cmpxchg() result.
	 */
	flags = work->flags & ~IRQ_WORK_PENDING;
	for (;;) {
		nflags = flags | IRQ_WORK_FLAGS;
		oflags = cmpxchg(&work->flags, flags, nflags);
		if (oflags == flags)
			break;
		if (oflags & IRQ_WORK_PENDING)
			return false;
		flags = oflags;
		cpu_relax();
	}

	return true;
}

void __weak arch_irq_work_raise(void)
{
	/*
	 * Lame architectures will get the timer tick callback
	 */
}

#ifdef CONFIG_SMP
/*
 * Enqueue the irq_work @work on @cpu unless it's already pending
 * somewhere.
 *
 * Can be re-enqueued while the callback is still in progress.
 */
bool irq_work_queue_on(struct irq_work *work, int cpu)
{
	/* All work should have been flushed before going offline */
	WARN_ON_ONCE(cpu_is_offline(cpu));

	/* Arch remote IPI send/receive backend aren't NMI safe */
	WARN_ON_ONCE(in_nmi());

	/* Only queue if not already pending */
	if (!irq_work_claim(work))
		return false;

	if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
		arch_send_call_function_single_ipi(cpu);

	return true;
}
EXPORT_SYMBOL_GPL(irq_work_queue_on);
#endif

/* Enqueue the irq work @work on the current CPU */
bool irq_work_queue(struct irq_work *work)
{
	/* Only queue if not already pending */
	if (!irq_work_claim(work))
		return false;

	/* Queue the entry and raise the IPI if needed. */
	preempt_disable();

	/* If the work is "lazy", handle it from next tick if any */
	if (work->flags & IRQ_WORK_LAZY) {
		if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
		    tick_nohz_tick_stopped())
			arch_irq_work_raise();
	} else {
		if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
			arch_irq_work_raise();
	}

	preempt_enable();

	return true;
}
EXPORT_SYMBOL_GPL(irq_work_queue);

bool irq_work_needs_cpu(void)
{
	struct llist_head *raised, *lazy;

	raised = this_cpu_ptr(&raised_list);
	lazy = this_cpu_ptr(&lazy_list);

	if (llist_empty(raised) || arch_irq_work_has_interrupt())
		if (llist_empty(lazy))
			return false;

	/* All work should have been flushed before going offline */
	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));

	return true;
}

static void irq_work_run_list(struct llist_head *list)
{
	unsigned long flags;
	struct irq_work *work;
	struct llist_node *llnode;

	BUG_ON(!irqs_disabled());

	if (llist_empty(list))
		return;

	llnode = llist_del_all(list);
	while (llnode != NULL) {
		work = llist_entry(llnode, struct irq_work, llnode);

		llnode = llist_next(llnode);

		/*
		 * Clear the PENDING bit, after this point the @work
		 * can be re-used.
		 * Make it immediately visible so that other CPUs trying
		 * to claim that work don't rely on us to handle their data
		 * while we are in the middle of the func.
		 */
		flags = work->flags & ~IRQ_WORK_PENDING;
		xchg(&work->flags, flags);

		work->func(work);
		/*
		 * Clear the BUSY bit and return to the free state if
		 * no-one else claimed it meanwhile.
		 */
		(void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
	}
}

/*
 * hotplug calls this through:
 *  hotplug_cfd() -> flush_smp_call_function_queue()
 */
void irq_work_run(void)
{
	irq_work_run_list(this_cpu_ptr(&raised_list));
	irq_work_run_list(this_cpu_ptr(&lazy_list));
}
EXPORT_SYMBOL_GPL(irq_work_run);

void irq_work_tick(void)
{
	struct llist_head *raised = this_cpu_ptr(&raised_list);

	if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
		irq_work_run_list(raised);
	irq_work_run_list(this_cpu_ptr(&lazy_list));
}

/*
 * Synchronize against the irq_work @entry, ensures the entry is not
 * currently in use.
 */
void irq_work_sync(struct irq_work *work)
{
	WARN_ON_ONCE(irqs_disabled());

	while (work->flags & IRQ_WORK_BUSY)
		cpu_relax();
}
EXPORT_SYMBOL_GPL(irq_work_sync);
<noscript><input type='submit' value='reload'/></noscript></td></tr></table></form></div><table summary='commit info' class='commit-info'>
<tr><th>author</th><td>Thomas Gleixner &lt;tglx@linutronix.de&gt;</td><td class='right'>2017-01-31 09:37:34 +0100</td></tr>
<tr><th>committer</th><td>Thomas Gleixner &lt;tglx@linutronix.de&gt;</td><td class='right'>2017-01-31 21:47:58 +0100</td></tr>
<tr><th>commit</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/commit/kernel/rcu?h=nds-private-remove&amp;id=0becc0ae5b42828785b589f686725ff5bc3b9b25'>0becc0ae5b42828785b589f686725ff5bc3b9b25</a> (<a href='/cgit.cgi/linux/net-next.git/patch/kernel/rcu?id=0becc0ae5b42828785b589f686725ff5bc3b9b25'>patch</a>)</td></tr>
<tr><th>tree</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/tree/?h=nds-private-remove&amp;id=0becc0ae5b42828785b589f686725ff5bc3b9b25'>be6d0e1f37c38ed0a7dd5da2d4b1e93f0fb43101</a> /<a href='/cgit.cgi/linux/net-next.git/tree/kernel/rcu?h=nds-private-remove&amp;id=0becc0ae5b42828785b589f686725ff5bc3b9b25'>kernel/rcu</a></td></tr>
<tr><th>parent</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/commit/kernel/rcu?h=nds-private-remove&amp;id=24c2503255d35c269b67162c397a1a1c1e02f6ce'>24c2503255d35c269b67162c397a1a1c1e02f6ce</a> (<a href='/cgit.cgi/linux/net-next.git/diff/kernel/rcu?h=nds-private-remove&amp;id=0becc0ae5b42828785b589f686725ff5bc3b9b25&amp;id2=24c2503255d35c269b67162c397a1a1c1e02f6ce'>diff</a>)</td></tr></table>
<div class='commit-subject'>x86/mce: Make timer handling more robust</div><div class='commit-msg'>Erik reported that on a preproduction hardware a CMCI storm triggers the
BUG_ON in add_timer_on(). The reason is that the per CPU MCE timer is
started by the CMCI logic before the MCE CPU hotplug callback starts the
timer with add_timer_on(). So the timer is already queued which triggers
the BUG.

Using add_timer_on() is pretty pointless in this code because the timer is
strictlty per CPU, initialized as pinned and all operations which arm the
timer happen on the CPU to which the timer belongs.

Simplify the whole machinery by using mod_timer() instead of add_timer_on()
which avoids the problem because mod_timer() can handle already queued
timers. Use __start_timer() everywhere so the earliest armed expiry time is
preserved.

Reported-by: Erik Veijola &lt;erik.veijola@intel.com&gt;
Tested-by: Borislav Petkov &lt;bp@alien8.de&gt;
Signed-off-by: Thomas Gleixner &lt;tglx@linutronix.de&gt;
Reviewed-by: Borislav Petkov &lt;bp@alien8.de&gt;
Cc: Tony Luck &lt;tony.luck@intel.com&gt;
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701310936080.3457@nanos
Signed-off-by: Thomas Gleixner &lt;tglx@linutronix.de&gt;

</div><div class='diffstat-header'><a href='/cgit.cgi/linux/net-next.git/diff/?h=nds-private-remove&amp;id=0becc0ae5b42828785b589f686725ff5bc3b9b25'>Diffstat</a> (limited to 'kernel/rcu')</div><table summary='diffstat' class='diffstat'>