samples/watch_queue/watch_epoll.c - linux/kernel/git/dhowells/linux-fs - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 /*
  *  Copyright (C) 2018	Roman Penyaev
  *  Copyright (C) 2019	David Howells <dhowells@redhat.com>
  *
  *  Purpose of the tool is to generate N events from different threads and to
  *  measure how fast those events will be delivered to thread which does epoll.
  */

 #define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdbool.h>
 #include <time.h>
 #include <assert.h>
 #include <sys/mman.h>
 #include <sys/ioctl.h>
 #include <sys/eventfd.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <pthread.h>
 #include <errno.h>
 #include <err.h>
 #include <numa.h>
 #include <poll.h>
 #include <linux/unistd.h>
 #include <linux/watch_queue.h>
 #define epoll_event xxx_epoll_event
 #include <linux/eventpoll.h>
 #undef epoll_event
 #undef EPOLL_CLOEXEC
 #undef EPOLLIN
 #undef EPOLLPRI
 #undef EPOLLOUT
 #undef EPOLLERR
 #undef EPOLLHUP
 #undef EPOLLNVAL
 #undef EPOLLRDNORM
 #undef EPOLLRDBAND
 #undef EPOLLWRNORM
 #undef EPOLLWRBAND
 #undef EPOLLMSG
 #undef EPOLLRDHUP
 #undef EPOLLEXCLUSIVE
 #undef EPOLLWAKEUP
 #undef EPOLLONESHOT
 #undef EPOLLET

 #include <sys/epoll.h>

 #define BUILD_BUG_ON(condition) ((void )sizeof(char [1 - 2*!!(condition)]))
 #define READ_ONCE(v) __atomic_load(&v, __ATOMIC_RELAXED)

 #define ITERS	  1000000ull

 #ifndef __NR_epoll_create2
 #define __NR_epoll_create2 -1
 #endif

 static inline long epoll_create2(int flags, size_t size, int watch_fd)
 {
 	return syscall(__NR_epoll_create2, flags, size, watch_fd);
 }

 struct thread_ctx {
 	pthread_t thread;
 	int efd;
 };

 struct cpu_map {
 	unsigned int nr;
 	unsigned int map[];
 };

 static volatile unsigned int thr_ready;
 static volatile unsigned int start;

 static int is_cpu_online(int cpu)
 {
 	char buf[64];
 	char online;
 	FILE *f;
 	int rc;

 	snprintf(buf, sizeof(buf), "/sys/devices/system/cpu/cpu%d/online", cpu);
 	f = fopen(buf, "r");
 	if (!f)
 		return 1;

 	rc = fread(&online, 1, 1, f);
 	assert(rc == 1);
 	fclose(f);

 	return (char)online == '1';
 }

 static struct cpu_map *cpu_map__new(void)
 {
 	struct cpu_map *cpu;
 	struct bitmask *bm;

 	int i, bit, cpus_nr;

 	cpus_nr = numa_num_possible_cpus();
 	cpu = calloc(1, sizeof(*cpu) + sizeof(cpu->map[0]) * cpus_nr);
 	if (!cpu)
 		return NULL;

 	bm = numa_all_cpus_ptr;
 	assert(bm);

 	for (bit = 0, i = 0; bit < bm->size; bit++) {
 		if (numa_bitmask_isbitset(bm, bit) && is_cpu_online(bit)) {
 			cpu->map[i++] = bit;
 		}
 	}
 	cpu->nr = i;

 	return cpu;
 }

 static void cpu_map__put(struct cpu_map *cpu)
 {
 	free(cpu);
 }

 static inline unsigned long long nsecs(void)
 {
 	struct timespec ts = {0, 0};

 	clock_gettime(CLOCK_MONOTONIC, &ts);
 	return ((unsigned long long)ts.tv_sec * 1000000000ull) + ts.tv_nsec;
 }

 static void *thread_work(void *arg)
 {
 	struct thread_ctx *ctx = arg;
 	__u64 ucnt = 1;
 	unsigned int i;
 	int rc;

 	__atomic_add_fetch(&thr_ready, 1, __ATOMIC_RELAXED);

 	while (!start)
 		;

 	for (i = 0; i < ITERS; i++) {
 		rc = write(ctx->efd, &ucnt, sizeof(ucnt));
 		assert(rc == sizeof(ucnt));
 	}

 	return NULL;
 }

 /*
  * Process an event.
  */
 static __attribute__((noinline))
 void read_event(struct epoll_uheader *header, unsigned int idx,
 		struct epoll_event *event)
 {
 	struct epoll_uitem *item = &header->items[idx];

 	assert(idx <= header->max_items_nr); /* Corrupted index? */

 	/*
 	 * Fetch data first, if event is cleared by the kernel we drop the data
 	 * returning false.
 	 */
 	event->data.u64	= item->data;
 	event->events	= __atomic_exchange_n(&item->ready_events, 0,
 					      __ATOMIC_RELEASE);

 	assert(event->events & ~EPOLLREMOVED);
 }

 /*
  * Consume watch notifications, looking for EPOLL events.
  */
 static int watch_queue_consumer(int wfd, struct watch_queue_buffer *buf,
 				struct epoll_uheader *header,
 				struct epoll_event *events,
 				bool can_sleep)
 {
 	struct watch_notification *n;
 	struct epoll_notification *en;
 	unsigned int len, head, tail, mask = buf->meta.mask;
 	unsigned int epoll_slot;
 	int nfds = 0;

 	/* 'tail' belongs to us and is where events are consumed from */
 	tail = buf->meta.tail;

 	/* 'head' belongs to the kernel and is where events are inserted. */
 	if (can_sleep) {
 		head = __atomic_load_n(&buf->meta.head, __ATOMIC_ACQUIRE);
 		if (tail == head) {
 			struct pollfd p[1];
 			p[0].fd = wfd;
 			p[0].events = POLLIN | POLLERR;
 			p[0].revents = 0;

 			if (poll(p, 1, -1) == -1)
 				err(EXIT_FAILURE, "wq/poll");
 		}
 	}

 #if 0
 	printf("ptrs h=%x t=%x m=%x\n",
 	       buf->meta.head, buf->meta.tail, buf->meta.mask);
 #endif

 	head = __atomic_load_n(&buf->meta.head, __ATOMIC_ACQUIRE);
 	while (tail != head) {
 		n = &buf->slots[tail & mask];
 #if 0
 		printf("NOTIFY[%08x-%08x] ty=%04x sy=%04x i=%08x\n",
 		       head, tail, n->type, n->subtype, n->info);
 #endif
 		if (buf->meta.watch.info & WATCH_INFO_NOTIFICATIONS_LOST)
 			printf("[!] notifications lost\n");

 		len = (n->info & WATCH_INFO_LENGTH) >> WATCH_INFO_LENGTH__SHIFT;
 		assert(len > 0);

 		switch (n->type) {
 		case WATCH_TYPE_META:
 #if 0
 			if (n->subtype == WATCH_META_REMOVAL_NOTIFICATION)
 				printf("REMOVAL of watchpoint %08x\n",
 				       n->info & WATCH_INFO_ID);
 #endif
 			/* Fall through */
 		default:
 			tail += len;
 			__atomic_store_n(&buf->meta.tail, tail, __ATOMIC_RELEASE);
 			break;

 		case WATCH_TYPE_EPOLL_NOTIFY:
 			en = (struct epoll_notification *)n;
 			epoll_slot = en->watch.info & WATCH_INFO_TYPE_INFO;
 			epoll_slot >>= WATCH_INFO_TYPE_INFO__SHIFT;

 			/* Consume the slot before we clear the events */
 			tail += len;
 			__atomic_store_n(&buf->meta.tail, tail, __ATOMIC_RELEASE);

 			read_event(header, epoll_slot, &events[nfds]);
 			nfds++;
 			break;
 		}
 	}

 	return nfds;
 }

 /*
  * Map the epoll descriptor table into the kernel.
  */
 static void uepoll_mmap(int epfd, struct epoll_uheader **_header, size_t *_mapping_size)
 {
 	struct epoll_uheader *header;
 	unsigned int len;

 	BUILD_BUG_ON(sizeof(*header) != EPOLL_USERPOLL_HEADER_SIZE);
 	BUILD_BUG_ON(sizeof(header->items[0]) != 16);

 	len = sysconf(_SC_PAGESIZE);
 again:
 	header = mmap(NULL, len, PROT_WRITE|PROT_READ, MAP_SHARED, epfd, 0);
 	if (header == MAP_FAILED)
 		err(EXIT_FAILURE, "mmap(header)");

 	if (header->header_length != len) {
 		unsigned int tmp_len = len;

 		len = header->header_length;
 		munmap(header, tmp_len);
 		goto again;
 	}

 	assert(header->magic == EPOLL_USERPOLL_HEADER_MAGIC);
 	*_header = header;
 	*_mapping_size = len;
 }

 /*
  * Create a watch queue and map it.
  */
 static int create_watch_queue(unsigned int buffer_size,
 			      struct watch_queue_buffer **_watch_queue)
 {
 	struct watch_queue_buffer *buf;
 	size_t page_size;
 	int wfd;

 	wfd = open("/dev/watch_queue", O_RDWR);
 	if (wfd == -1)
 		err(EXIT_FAILURE, "/dev/watch_queue");

 	if (ioctl(wfd, IOC_WATCH_QUEUE_SET_SIZE, buffer_size) == -1)
 		err(EXIT_FAILURE, "wq/size");

 	page_size = sysconf(_SC_PAGESIZE);
 	buf = mmap(NULL, buffer_size * page_size,
 		   PROT_READ | PROT_WRITE, MAP_SHARED, wfd, 0);
 	if (buf == MAP_FAILED)
 		err(EXIT_FAILURE, "wq/mmap");

 	*_watch_queue = buf;
 	return wfd;
 }

 static int do_bench(struct cpu_map *cpu, unsigned int nthreads,
 		    int wfd, struct watch_queue_buffer *watch_queue)
 {
 	struct epoll_event ev, events[nthreads];
 	struct thread_ctx threads[nthreads];
 	pthread_attr_t thrattr;
 	struct thread_ctx *ctx;
 	size_t mapping_size;
 	int rc, epfd, nfds;
 	cpu_set_t cpuset;
 	unsigned int i;

 	struct epoll_uheader *header;

 	unsigned long long epoll_calls = 0, epoll_nsecs;
 	unsigned long long ucnt, ucnt_sum = 0;

 	thr_ready = 0;
 	start = 0;

 	epfd = epoll_create2(EPOLL_USERPOLL, nthreads, wfd);
 	if (epfd < 0)
 		err(EXIT_FAILURE, "epoll_create2");

 	for (i = 0; i < nthreads; i++) {
 		ctx = &threads[i];

 		ctx->efd = eventfd(0, EFD_NONBLOCK);
 		if (ctx->efd < 0)
 			err(EXIT_FAILURE, "eventfd");

 		ev.events = EPOLLIN | EPOLLET;
 		ev.data.ptr = ctx;
 		rc = epoll_ctl(epfd, EPOLL_CTL_ADD, ctx->efd, &ev);
 		if (rc)
 			err(EXIT_FAILURE, "epoll_ctl");

 		CPU_ZERO(&cpuset);
 		CPU_SET(cpu->map[i % cpu->nr], &cpuset);

 		pthread_attr_init(&thrattr);
 		rc = pthread_attr_setaffinity_np(&thrattr, sizeof(cpu_set_t),
 						 &cpuset);
 		if (rc) {
 			errno = rc;
 			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
 		}

 		rc = pthread_create(&ctx->thread, NULL, thread_work, ctx);
 		if (rc) {
 			errno = rc;
 			err(EXIT_FAILURE, "pthread_create");
 		}
 	}

 	/* Map all pointers */
 	uepoll_mmap(epfd, &header, &mapping_size);

 	while (thr_ready != nthreads)
 		;

 	watch_queue_consumer(wfd, watch_queue, header, events, false);

 	/* Signal start for all threads */
 	start = 1;

 	epoll_nsecs = nsecs();
 	while (1) {
 		nfds = watch_queue_consumer(wfd, watch_queue, header, events,
 					    true);
 		if (nfds < 0)
 			err(EXIT_FAILURE, "epoll_wait");

 		epoll_calls++;

 		for (i = 0; i < (unsigned int)nfds; ++i) {
 			ctx = events[i].data.ptr;
 			rc = read(ctx->efd, &ucnt, sizeof(ucnt));
 			if (rc < 0) {
 				assert(errno == EAGAIN);
 				continue;
 			}
 			assert(rc == sizeof(ucnt));
 			ucnt_sum += ucnt;
 			if (ucnt_sum == nthreads * ITERS)
 				goto end;
 		}
 	}
 end:
 	epoll_nsecs = nsecs() - epoll_nsecs;

 	for (i = 0; i < nthreads; i++) {
 		ctx = &threads[i];
 		pthread_join(ctx->thread, NULL);
 	}

 	close(epfd);

 	watch_queue_consumer(wfd, watch_queue, header, events, false);
 	munmap(header, mapping_size);

 	printf("%7d   %8lld	%8lld\n",
 	       nthreads,
 	       ITERS * nthreads / (epoll_nsecs / 1000 / 1000),
 	       epoll_nsecs / 1000 / 1000);

 	return 0;
 }

 int main(int argc, char *argv[])
 {
 	static const unsigned int nthreads_arr[] = { 1, 8, 16, 32, 64, 128, 256 };
 	struct watch_queue_buffer *watch_queue;
 	struct cpu_map *cpu;
 	unsigned int i;
 	int wfd;

 	wfd = create_watch_queue(1, &watch_queue);

 	cpu = cpu_map__new();
 	if (!cpu) {
 		errno = ENOMEM;
 		err(EXIT_FAILURE, "cpu_map__new");
 	}

 	printf("threads	 events/ms  run-time ms\n");
 	for (i = 0; i < sizeof(nthreads_arr) / sizeof(nthreads_arr[0]); i++)
 		do_bench(cpu, nthreads_arr[i], wfd, watch_queue);

 	cpu_map__put(cpu);

 	return 0;
 }
	// SPDX-License-Identifier: GPL-2.0
	/*
	* Copyright (C) 2018 Roman Penyaev
	* Copyright (C) 2019 David Howells <dhowells@redhat.com>
	*
	* Purpose of the tool is to generate N events from different threads and to
	* measure how fast those events will be delivered to thread which does epoll.
	*/

	#define _GNU_SOURCE
	#include <stdio.h>
	#include <stdlib.h>
	#include <stdbool.h>
	#include <time.h>
	#include <assert.h>
	#include <sys/mman.h>
	#include <sys/ioctl.h>
	#include <sys/eventfd.h>
	#include <fcntl.h>
	#include <unistd.h>
	#include <pthread.h>
	#include <errno.h>
	#include <err.h>
	#include <numa.h>
	#include <poll.h>
	#include <linux/unistd.h>
	#include <linux/watch_queue.h>
	#define epoll_event xxx_epoll_event
	#include <linux/eventpoll.h>
	#undef epoll_event
	#undef EPOLL_CLOEXEC
	#undef EPOLLIN
	#undef EPOLLPRI
	#undef EPOLLOUT
	#undef EPOLLERR
	#undef EPOLLHUP
	#undef EPOLLNVAL
	#undef EPOLLRDNORM
	#undef EPOLLRDBAND
	#undef EPOLLWRNORM
	#undef EPOLLWRBAND
	#undef EPOLLMSG
	#undef EPOLLRDHUP
	#undef EPOLLEXCLUSIVE
	#undef EPOLLWAKEUP
	#undef EPOLLONESHOT
	#undef EPOLLET

	#include <sys/epoll.h>

	#define BUILD_BUG_ON(condition) ((void )sizeof(char [1 - 2*!!(condition)]))
	#define READ_ONCE(v) __atomic_load(&v, __ATOMIC_RELAXED)

	#define ITERS 1000000ull

	#ifndef __NR_epoll_create2
	#define __NR_epoll_create2 -1
	#endif

	static inline long epoll_create2(int flags, size_t size, int watch_fd)
	{
	return syscall(__NR_epoll_create2, flags, size, watch_fd);
	}

	struct thread_ctx {
	pthread_t thread;
	int efd;
	};

	struct cpu_map {
	unsigned int nr;
	unsigned int map[];
	};

	static volatile unsigned int thr_ready;
	static volatile unsigned int start;

	static int is_cpu_online(int cpu)
	{
	char buf[64];
	char online;
	FILE *f;
	int rc;

	snprintf(buf, sizeof(buf), "/sys/devices/system/cpu/cpu%d/online", cpu);
	f = fopen(buf, "r");
	if (!f)
	return 1;

	rc = fread(&online, 1, 1, f);
	assert(rc == 1);
	fclose(f);

	return (char)online == '1';
	}

	static struct cpu_map *cpu_map__new(void)
	{
	struct cpu_map *cpu;
	struct bitmask *bm;

	int i, bit, cpus_nr;

	cpus_nr = numa_num_possible_cpus();
	cpu = calloc(1, sizeof(cpu) + sizeof(cpu->map[0]) cpus_nr);
	if (!cpu)
	return NULL;

	bm = numa_all_cpus_ptr;
	assert(bm);

	for (bit = 0, i = 0; bit < bm->size; bit++) {
	if (numa_bitmask_isbitset(bm, bit) && is_cpu_online(bit)) {
	cpu->map[i++] = bit;
	}
	}
	cpu->nr = i;

	return cpu;
	}

	static void cpu_map__put(struct cpu_map *cpu)
	{
	free(cpu);
	}

	static inline unsigned long long nsecs(void)
	{
	struct timespec ts = {0, 0};

	clock_gettime(CLOCK_MONOTONIC, &ts);
	return ((unsigned long long)ts.tv_sec * 1000000000ull) + ts.tv_nsec;
	}

	static void thread_work(void arg)
	{
	struct thread_ctx *ctx = arg;
	__u64 ucnt = 1;
	unsigned int i;
	int rc;

	__atomic_add_fetch(&thr_ready, 1, __ATOMIC_RELAXED);

	while (!start)
	;

	for (i = 0; i < ITERS; i++) {
	rc = write(ctx->efd, &ucnt, sizeof(ucnt));
	assert(rc == sizeof(ucnt));
	}

	return NULL;
	}

	/*
	* Process an event.
	*/
	static __attribute__((noinline))
	void read_event(struct epoll_uheader *header, unsigned int idx,
	struct epoll_event *event)
	{
	struct epoll_uitem *item = &header->items[idx];

	assert(idx <= header->max_items_nr); /* Corrupted index? */

	/*
	* Fetch data first, if event is cleared by the kernel we drop the data
	* returning false.
	*/
	event->data.u64 = item->data;
	event->events = __atomic_exchange_n(&item->ready_events, 0,
	__ATOMIC_RELEASE);

	assert(event->events & ~EPOLLREMOVED);
	}

	/*
	* Consume watch notifications, looking for EPOLL events.
	*/
	static int watch_queue_consumer(int wfd, struct watch_queue_buffer *buf,
	struct epoll_uheader *header,
	struct epoll_event *events,
	bool can_sleep)
	{
	struct watch_notification *n;
	struct epoll_notification *en;
	unsigned int len, head, tail, mask = buf->meta.mask;
	unsigned int epoll_slot;
	int nfds = 0;

	/* 'tail' belongs to us and is where events are consumed from */
	tail = buf->meta.tail;

	/* 'head' belongs to the kernel and is where events are inserted. */
	if (can_sleep) {
	head = __atomic_load_n(&buf->meta.head, __ATOMIC_ACQUIRE);
	if (tail == head) {
	struct pollfd p[1];
	p[0].fd = wfd;
	p[0].events = POLLIN \| POLLERR;
	p[0].revents = 0;

	if (poll(p, 1, -1) == -1)
	err(EXIT_FAILURE, "wq/poll");
	}
	}

	#if 0
	printf("ptrs h=%x t=%x m=%x\n",
	buf->meta.head, buf->meta.tail, buf->meta.mask);
	#endif

	head = __atomic_load_n(&buf->meta.head, __ATOMIC_ACQUIRE);
	while (tail != head) {
	n = &buf->slots[tail & mask];
	#if 0
	printf("NOTIFY[%08x-%08x] ty=%04x sy=%04x i=%08x\n",
	head, tail, n->type, n->subtype, n->info);
	#endif
	if (buf->meta.watch.info & WATCH_INFO_NOTIFICATIONS_LOST)
	printf("[!] notifications lost\n");

	len = (n->info & WATCH_INFO_LENGTH) >> WATCH_INFO_LENGTH__SHIFT;
	assert(len > 0);

	switch (n->type) {
	case WATCH_TYPE_META:
	#if 0
	if (n->subtype == WATCH_META_REMOVAL_NOTIFICATION)
	printf("REMOVAL of watchpoint %08x\n",
	n->info & WATCH_INFO_ID);
	#endif
	/* Fall through */
	default:
	tail += len;
	__atomic_store_n(&buf->meta.tail, tail, __ATOMIC_RELEASE);
	break;

	case WATCH_TYPE_EPOLL_NOTIFY:
	en = (struct epoll_notification *)n;
	epoll_slot = en->watch.info & WATCH_INFO_TYPE_INFO;
	epoll_slot >>= WATCH_INFO_TYPE_INFO__SHIFT;

	/* Consume the slot before we clear the events */
	tail += len;
	__atomic_store_n(&buf->meta.tail, tail, __ATOMIC_RELEASE);

	read_event(header, epoll_slot, &events[nfds]);
	nfds++;
	break;
	}
	}

	return nfds;
	}

	/*
	* Map the epoll descriptor table into the kernel.
	*/
	static void uepoll_mmap(int epfd, struct epoll_uheader *_header, size_t _mapping_size)
	{
	struct epoll_uheader *header;
	unsigned int len;

	BUILD_BUG_ON(sizeof(*header) != EPOLL_USERPOLL_HEADER_SIZE);
	BUILD_BUG_ON(sizeof(header->items[0]) != 16);

	len = sysconf(_SC_PAGESIZE);
	again:
	header = mmap(NULL, len, PROT_WRITE\|PROT_READ, MAP_SHARED, epfd, 0);
	if (header == MAP_FAILED)
	err(EXIT_FAILURE, "mmap(header)");

	if (header->header_length != len) {
	unsigned int tmp_len = len;

	len = header->header_length;
	munmap(header, tmp_len);
	goto again;
	}

	assert(header->magic == EPOLL_USERPOLL_HEADER_MAGIC);
	*_header = header;
	*_mapping_size = len;
	}

	/*
	* Create a watch queue and map it.
	*/
	static int create_watch_queue(unsigned int buffer_size,
	struct watch_queue_buffer **_watch_queue)
	{
	struct watch_queue_buffer *buf;
	size_t page_size;
	int wfd;

	wfd = open("/dev/watch_queue", O_RDWR);
	if (wfd == -1)
	err(EXIT_FAILURE, "/dev/watch_queue");

	if (ioctl(wfd, IOC_WATCH_QUEUE_SET_SIZE, buffer_size) == -1)
	err(EXIT_FAILURE, "wq/size");

	page_size = sysconf(_SC_PAGESIZE);
	buf = mmap(NULL, buffer_size * page_size,
	PROT_READ \| PROT_WRITE, MAP_SHARED, wfd, 0);
	if (buf == MAP_FAILED)
	err(EXIT_FAILURE, "wq/mmap");

	*_watch_queue = buf;
	return wfd;
	}

	static int do_bench(struct cpu_map *cpu, unsigned int nthreads,
	int wfd, struct watch_queue_buffer *watch_queue)
	{
	struct epoll_event ev, events[nthreads];
	struct thread_ctx threads[nthreads];
	pthread_attr_t thrattr;
	struct thread_ctx *ctx;
	size_t mapping_size;
	int rc, epfd, nfds;
	cpu_set_t cpuset;
	unsigned int i;

	struct epoll_uheader *header;

	unsigned long long epoll_calls = 0, epoll_nsecs;
	unsigned long long ucnt, ucnt_sum = 0;

	thr_ready = 0;
	start = 0;

	epfd = epoll_create2(EPOLL_USERPOLL, nthreads, wfd);
	if (epfd < 0)
	err(EXIT_FAILURE, "epoll_create2");

	for (i = 0; i < nthreads; i++) {
	ctx = &threads[i];

	ctx->efd = eventfd(0, EFD_NONBLOCK);
	if (ctx->efd < 0)
	err(EXIT_FAILURE, "eventfd");

	ev.events = EPOLLIN \| EPOLLET;
	ev.data.ptr = ctx;
	rc = epoll_ctl(epfd, EPOLL_CTL_ADD, ctx->efd, &ev);
	if (rc)
	err(EXIT_FAILURE, "epoll_ctl");

	CPU_ZERO(&cpuset);
	CPU_SET(cpu->map[i % cpu->nr], &cpuset);

	pthread_attr_init(&thrattr);
	rc = pthread_attr_setaffinity_np(&thrattr, sizeof(cpu_set_t),
	&cpuset);
	if (rc) {
	errno = rc;
	err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
	}

	rc = pthread_create(&ctx->thread, NULL, thread_work, ctx);
	if (rc) {
	errno = rc;
	err(EXIT_FAILURE, "pthread_create");
	}
	}

	/* Map all pointers */
	uepoll_mmap(epfd, &header, &mapping_size);

	while (thr_ready != nthreads)
	;

	watch_queue_consumer(wfd, watch_queue, header, events, false);

	/* Signal start for all threads */
	start = 1;

	epoll_nsecs = nsecs();
	while (1) {
	nfds = watch_queue_consumer(wfd, watch_queue, header, events,
	true);
	if (nfds < 0)
	err(EXIT_FAILURE, "epoll_wait");

	epoll_calls++;

	for (i = 0; i < (unsigned int)nfds; ++i) {
	ctx = events[i].data.ptr;
	rc = read(ctx->efd, &ucnt, sizeof(ucnt));
	if (rc < 0) {
	assert(errno == EAGAIN);
	continue;
	}
	assert(rc == sizeof(ucnt));
	ucnt_sum += ucnt;
	if (ucnt_sum == nthreads * ITERS)
	goto end;
	}
	}
	end:
	epoll_nsecs = nsecs() - epoll_nsecs;

	for (i = 0; i < nthreads; i++) {
	ctx = &threads[i];
	pthread_join(ctx->thread, NULL);
	}

	close(epfd);

	watch_queue_consumer(wfd, watch_queue, header, events, false);
	munmap(header, mapping_size);

	printf("%7d %8lld %8lld\n",
	nthreads,
	ITERS * nthreads / (epoll_nsecs / 1000 / 1000),
	epoll_nsecs / 1000 / 1000);

	return 0;
	}

	int main(int argc, char *argv[])
	{
	static const unsigned int nthreads_arr[] = { 1, 8, 16, 32, 64, 128, 256 };
	struct watch_queue_buffer *watch_queue;
	struct cpu_map *cpu;
	unsigned int i;
	int wfd;

	wfd = create_watch_queue(1, &watch_queue);

	cpu = cpu_map__new();
	if (!cpu) {
	errno = ENOMEM;
	err(EXIT_FAILURE, "cpu_map__new");
	}

	printf("threads events/ms run-time ms\n");
	for (i = 0; i < sizeof(nthreads_arr) / sizeof(nthreads_arr[0]); i++)
	do_bench(cpu, nthreads_arr[i], wfd, watch_queue);

	cpu_map__put(cpu);

	return 0;
	}