eventfd是一个利用匿名文件设计的系统调用,用作高效的进程间通信,本文介绍一下eventfd的内核实现和用户测试。方便后续编程时可以考虑使用eventfd
eventfd是通过syscall实现,如下
SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) { return do_eventfd(count, flags); } SYSCALL_DEFINE1(eventfd, unsigned int, count) { return do_eventfd(count, 0); }
其实现如下:
static int do_eventfd(unsigned int count, int flags) { struct eventfd_ctx *ctx; struct file *file; int fd; /* Check the EFD_* constants for consistency. */ BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); if (flags & ~EFD_FLAGS_SET) return -EINVAL; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; kref_init(&ctx->kref); init_waitqueue_head(&ctx->wqh); ctx->count = count; ctx->flags = flags; ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL); flags &= EFD_SHARED_FCNTL_FLAGS; flags |= O_RDWR; fd = get_unused_fd_flags(flags); if (fd < 0) goto err; file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags); if (IS_ERR(file)) { put_unused_fd(fd); fd = PTR_ERR(file); goto err; } file->f_mode |= FMODE_NOWAIT; fd_install(fd, file); return fd; err: eventfd_free_ctx(ctx); return fd; }
do_eventfd比较重要的点在于anon_inode_getfile,这里通过匿名页来设置此系统调用。
再重要的就是eventfd_ctx结构,如下:
struct eventfd_ctx { struct kref kref; wait_queue_head_t wqh; /* * Every time that a write(2) is performed on an eventfd, the * value of the __u64 being written is added to "count" and a * wakeup is performed on "wqh". A read(2) will return the "count" * value to userspace, and will reset "count" to zero. The kernel * side eventfd_signal() also, adds to the "count" counter and * issue a wakeup. */ __u64 count; unsigned int flags; int id; };
这里看到了我们read和write作用的是count值,所以这个fd只能通过count来传递信息。而read/write是通过标准的fops实现,如下
static const struct file_operations eventfd_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = eventfd_show_fdinfo, #endif .release = eventfd_release, .poll = eventfd_poll, .read_iter = eventfd_read, .write = eventfd_write, .llseek = noop_llseek, };
read操作的核心实现是eventfd_ctx_do_read,这里如果是flag设置了semaphore则只会减1,否则可以直接是count值,如下:
void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) { lockdep_assert_held(&ctx->wqh.lock); *cnt = ((ctx->flags & EFD_SEMAPHORE) && ctx->count) ? 1 : ctx->count; ctx->count -= *cnt; }
write操作直接在eventfd_write中,每次write会自增,也就是
ctx->count += ucnt
poll操作根据poll_wait来等待,READ_ONCE来保证count的读取只有一次,根据注释我们可以知道,wqh锁和qwh锁不会竞争问题,也就是安全的。
* poll write * ----------------- ------------ * lock ctx->wqh.lock (in poll_wait) * count = ctx->count * __add_wait_queue * unlock ctx->wqh.lock * lock ctx->qwh.lock * ctx->count += n * if (waitqueue_active) * wake_up_locked_poll * unlock ctx->qwh.lock * eventfd_poll returns 0
其代码实现如下
static unsigned int eventfd_poll(struct file *file, poll_table *wait) { struct eventfd_ctx *ctx = file->private_data; unsigned int events = 0; u64 count; poll_wait(file, &ctx->wqh, wait); count = READ_ONCE(ctx->count); if (count > 0) events |= POLLIN; if (count == ULLONG_MAX) events |= POLLERR; if (ULLONG_MAX - 1 > count) events |= POLLOUT; return events; }
而对于内核空间对eventfd的调用,可以通过eventfd_signal函数,其实现是eventfd_signal_mask,这里同样是自加如下:
__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask) { unsigned long flags; /* * Deadlock or stack overflow issues can happen if we recurse here * through waitqueue wakeup handlers. If the caller users potentially * nested waitqueues with custom wakeup handlers, then it should * check eventfd_signal_count() before calling this function. If * it returns true, the eventfd_signal() call should be deferred to a * safe context. */ if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) return 0; spin_lock_irqsave(&ctx->wqh.lock, flags); this_cpu_inc(eventfd_wake_count); if (ULLONG_MAX - ctx->count < n) n = ULLONG_MAX - ctx->count; ctx->count += n; if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask); this_cpu_dec(eventfd_wake_count); spin_unlock_irqrestore(&ctx->wqh.lock, flags); return n; }
为了使用eventfd,我们可以直接使用c库封装的eventfd函数,示例如下:
#include <sys/eventfd.h> #include <unistd.h> #include <stdint.h> #include <stdio.h> int main() { int efd; uint64_t value; efd = eventfd(0, 0); if (efd == -1) { perror("eventfd"); return 1; } value = 1; if (write(efd, &value, sizeof(value)) == -1) { perror("write"); return 1; } if (write(efd, &value, sizeof(value)) == -1) { perror("write"); return 1; } if (read(efd, &value, sizeof(value)) == -1) { perror("read"); return 1; } printf("[kylin]: read value: %lu\n", value); close(efd); return 0; }
运行后结果如下:
[kylin]: read value: 2
至此eventfd介绍完成,详细大家在使用高性能的进程通信的时候,可以适当考虑eventfd