/*
 * The struct perf_event_attr test support.
 *
 * This test is embedded inside into perf directly and is governed
 * by the PERF_TEST_ATTR environment variable and hook inside
 * sys_perf_event_open function.
 *
 * The general idea is to store 'struct perf_event_attr' details for
 * each event created within single perf command. Each event details
 * are stored into separate text file. Once perf command is finished
 * these files can be checked for values we expect for command.
 *
 * Besides 'struct perf_event_attr' values we also store 'fd' and
 * 'group_fd' values to allow checking for groups created.
 *
 * This all is triggered by setting PERF_TEST_ATTR environment variable.
 * It must contain name of existing directory with access and write
 * permissions. All the event text files are stored there.
 */

#include <stdlib.h>
#include <stdio.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include "../perf.h"
#include "util.h"
#include <subcmd/exec-cmd.h>
#include "tests.h"

#define ENV "PERF_TEST_ATTR"

extern int verbose;

static char *dir;

void test_attr__init(void)
{
	dir = getenv(ENV);
	test_attr__enabled = (dir != NULL);
}

#define BUFSIZE 1024

#define __WRITE_ASS(str, fmt, data)					\
do {									\
	char buf[BUFSIZE];						\
	size_t size;							\
									\
	size = snprintf(buf, BUFSIZE, #str "=%"fmt "\n", data);		\
	if (1 != fwrite(buf, size, 1, file)) {				\
		perror("test attr - failed to write event file");	\
		fclose(file);						\
		return -1;						\
	}								\
									\
} while (0)

#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field)

static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
		       int fd, int group_fd, unsigned long flags)
{
	FILE *file;
	char path[PATH_MAX];

	snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir,
		 attr->type, attr->config, fd);

	file = fopen(path, "w+");
	if (!file) {
		perror("test attr - failed to open event file");
		return -1;
	}

	if (fprintf(file, "[event-%d-%llu-%d]\n",
		    attr->type, attr->config, fd) < 0) {
		perror("test attr - failed to write event file");
		fclose(file);
		return -1;
	}

	/* syscall arguments */
	__WRITE_ASS(fd,       "d", fd);
	__WRITE_ASS(group_fd, "d", group_fd);
	__WRITE_ASS(cpu,      "d", cpu);
	__WRITE_ASS(pid,      "d", pid);
	__WRITE_ASS(flags,   "lu", flags);

	/* struct perf_event_attr */
	WRITE_ASS(type,   PRIu32);
	WRITE_ASS(size,   PRIu32);
	WRITE_ASS(config,  "llu");
	WRITE_ASS(sample_period, "llu");
	WRITE_ASS(sample_type,   "llu");
	WRITE_ASS(read_format,   "llu");
	WRITE_ASS(disabled,       "d");
	WRITE_ASS(inherit,        "d");
	WRITE_ASS(pinned,         "d");
	WRITE_ASS(exclusive,      "d");
	WRITE_ASS(exclude_user,   "d");
	WRITE_ASS(exclude_kernel, "d");
	WRITE_ASS(exclude_hv,     "d");
	WRITE_ASS(exclude_idle,   "d");
	WRITE_ASS(mmap,           "d");
	WRITE_ASS(comm,           "d");
	WRITE_ASS(freq,           "d");
	WRITE_ASS(inherit_stat,   "d");
	WRITE_ASS(enable_on_exec, "d");
	WRITE_ASS(task,           "d");
	WRITE_ASS(watermark,      "d");
	WRITE_ASS(precise_ip,     "d");
	WRITE_ASS(mmap_data,      "d");
	WRITE_ASS(sample_id_all,  "d");
	WRITE_ASS(exclude_host,   "d");
	WRITE_ASS(exclude_guest,  "d");
	WRITE_ASS(exclude_callchain_kernel, "d");
	WRITE_ASS(exclude_callchain_user, "d");
	WRITE_ASS(wakeup_events, PRIu32);
	WRITE_ASS(bp_type, PRIu32);
	WRITE_ASS(config1, "llu");
	WRITE_ASS(config2, "llu");
	WRITE_ASS(branch_sample_type, "llu");
	WRITE_ASS(sample_regs_user,   "llu");
	WRITE_ASS(sample_stack_user,  PRIu32);

	fclose(file);
	return 0;
}

void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
		     int fd, int group_fd, unsigned long flags)
{
	int errno_saved = errno;

	if (store_event(attr, pid, cpu, fd, group_fd, flags))
		die("test attr FAILED");

	errno = errno_saved;
}

static int run_dir(const char *d, const char *perf)
{
	char v[] = "-vvvvv";
	int vcnt = min(verbose, (int) sizeof(v) - 1);
	char cmd[3*PATH_MAX];

	if (verbose)
		vcnt++;

	snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
		 d, d, perf, vcnt, v);

	return system(cmd);
}

int test__attr(int subtest __maybe_unused)
{
	struct stat st;
	char path_perf[PATH_MAX];
	char path_dir[PATH_MAX];

	/* First try developement tree tests. */
	if (!lstat("./tests", &st))
		return run_dir("./tests", "./perf");

	/* Then installed path. */
	snprintf(path_dir,  PATH_MAX, "%s/tests", get_argv_exec_path());
	snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR);

	if (!lstat(path_dir, &st) &&
	    !lstat(path_perf, &st))
		return run_dir(path_dir, path_perf);

	return TEST_SKIP;
}
th><td>Linus Torvalds &lt;torvalds@linux-foundation.org&gt;</td><td class='right'>2017-01-24 16:26:14 -0800</td></tr>
<tr><th>commit</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/commit/tools/perf/util?id=ea57485af8f4221312a5a95d63c382b45e7840dc'>ea57485af8f4221312a5a95d63c382b45e7840dc</a> (<a href='/cgit.cgi/linux/net-next.git/patch/tools/perf/util?id=ea57485af8f4221312a5a95d63c382b45e7840dc'>patch</a>)</td></tr>
<tr><th>tree</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/tree/?id=ea57485af8f4221312a5a95d63c382b45e7840dc'>11dffacd4921729f9009bdb87ab9782efbde81b6</a> /<a href='/cgit.cgi/linux/net-next.git/tree/tools/perf/util?id=ea57485af8f4221312a5a95d63c382b45e7840dc'>tools/perf/util</a></td></tr>
<tr><th>parent</th><td colspan='2' class='oid'><a href='/cgit.cgi/linux/net-next.git/commit/tools/perf/util?id=ff7a28a074ccbea999dadbb58c46212cf90984c6'>ff7a28a074ccbea999dadbb58c46212cf90984c6</a> (<a href='/cgit.cgi/linux/net-next.git/diff/tools/perf/util?id=ea57485af8f4221312a5a95d63c382b45e7840dc&amp;id2=ff7a28a074ccbea999dadbb58c46212cf90984c6'>diff</a>)</td></tr></table>
<div class='commit-subject'>mm, page_alloc: fix check for NULL preferred_zone</div><div class='commit-msg'>Patch series "fix premature OOM regression in 4.7+ due to cpuset races".

This is v2 of my attempt to fix the recent report based on LTP cpuset
stress test [1].  The intention is to go to stable 4.9 LTSS with this,
as triggering repeated OOMs is not nice.  That's why the patches try to
be not too intrusive.

Unfortunately why investigating I found that modifying the testcase to
use per-VMA policies instead of per-task policies will bring the OOM's
back, but that seems to be much older and harder to fix problem.  I have
posted a RFC [2] but I believe that fixing the recent regressions has a
higher priority.

Longer-term we might try to think how to fix the cpuset mess in a better
and less error prone way.  I was for example very surprised to learn,
that cpuset updates change not only task-&gt;mems_allowed, but also
nodemask of mempolicies.  Until now I expected the parameter to
alloc_pages_nodemask() to be stable.  I wonder why do we then treat
cpusets specially in get_page_from_freelist() and distinguish HARDWALL
etc, when there's unconditional intersection between mempolicy and
cpuset.  I would expect the nodemask adjustment for saving overhead in
g_p_f(), but that clearly doesn't happen in the current form.  So we
have both crazy complexity and overhead, AFAICS.

[1] https://lkml.kernel.org/r/CAFpQJXUq-JuEP=QPidy4p_=FN0rkH5Z-kfB4qBvsf6jMS87Edg@mail.gmail.com
[2] https://lkml.kernel.org/r/7c459f26-13a6-a817-e508-b65b903a8378@suse.cz

This patch (of 4):

Since commit c33d6c06f60f ("mm, page_alloc: avoid looking up the first
zone in a zonelist twice") we have a wrong check for NULL preferred_zone,
which can theoretically happen due to concurrent cpuset modification.  We
check the zoneref pointer which is never NULL and we should check the zone
pointer.  Also document this in first_zones_zonelist() comment per Michal
Hocko.

Fixes: c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice")
Link: http://lkml.kernel.org/r/20170120103843.24587-2-vbabka@suse.cz
Signed-off-by: Vlastimil Babka &lt;vbabka@suse.cz&gt;
Acked-by: Mel Gorman &lt;mgorman@techsingularity.net&gt;
Acked-by: Hillf Danton &lt;hillf.zj@alibaba-inc.com&gt;
Cc: Ganapatrao Kulkarni &lt;gpkulkarni@gmail.com&gt;
Cc: Michal Hocko &lt;mhocko@suse.com&gt;
Cc: &lt;stable@vger.kernel.org&gt;
Signed-off-by: Andrew Morton &lt;akpm@linux-foundation.org&gt;
Signed-off-by: Linus Torvalds &lt;torvalds@linux-foundation.org&gt;
</div><div class='diffstat-header'><a href='/cgit.cgi/linux/net-next.git/diff/?id=ea57485af8f4221312a5a95d63c382b45e7840dc'>Diffstat</a> (limited to 'tools/perf/util')</div><table summary='diffstat' class='diffstat'>