Browse Source

[LibOS, Pal] Support eventfd()

This commit adds support for eventfd():
- shim_do_eventfd() and shim_do_eventfd2() emulation at LibOS level;
- new eventfd pseudo-FS;
- db_eventfd.c emulation to route eventfd calls to host OS at Pal
  level (implementation for Linux and Linux-SGX, stubs for Skeleton);
- new OCALL ocall_eventfd() for Linux-SGX Pal;
- LibOS regression test for eventfd.

This implementation of eventfd() correctly handles IPC between
threads of the same process; it also must handle IPC between parent/
child processes. This implementation currently doesn't support the
scenario when kernel signals the process via eventfd, but is easily
extensible for this. The implementation currently doesn't have
additional checks to prevent Iago attacks on eventfd.
Krishnakumar, Sudha 4 years ago
parent
commit
3ee41aa9ff

+ 1 - 0
LibOS/shim/include/shim_fs.h

@@ -569,6 +569,7 @@ extern struct shim_mount chroot_builtin_fs;
 extern struct shim_mount pipe_builtin_fs;
 extern struct shim_mount socket_builtin_fs;
 extern struct shim_mount epoll_builtin_fs;
+extern struct shim_mount eventfd_builtin_fs;
 
 /* proc file system */
 struct proc_nm_ops {

+ 1 - 0
LibOS/shim/include/shim_handle.h

@@ -50,6 +50,7 @@ enum shim_handle_type {
     TYPE_FUTEX,
     TYPE_STR,
     TYPE_EPOLL,
+    TYPE_EVENTFD
 };
 
 struct shim_handle;

+ 4 - 2
LibOS/shim/include/shim_table.h

@@ -480,6 +480,8 @@ ssize_t shim_do_recvmmsg(int sockfd, struct mmsghdr* msg, size_t vlen, int flags
 int shim_do_prlimit64(pid_t pid, int resource, const struct __kernel_rlimit64* new_rlim,
                       struct __kernel_rlimit64* old_rlim);
 ssize_t shim_do_sendmmsg(int sockfd, struct mmsghdr* msg, size_t vlen, int flags);
+int shim_do_eventfd2(unsigned int count, int flags);
+int shim_do_eventfd(unsigned int count);
 
 /* libos call implementation */
 int shim_do_msgpersist(int msqid, int cmd);
@@ -781,14 +783,14 @@ int shim_epoll_pwait(int epfd, struct __kernel_epoll_event* events, int maxevent
                      const __sigset_t* sigmask, size_t sigsetsize);
 int shim_signalfd(int ufd, __sigset_t* user_mask, size_t sizemask);
 int shim_timerfd_create(int clockid, int flags);
-int shim_eventfd(int count);
+int shim_eventfd(unsigned int count);
 int shim_fallocate(int fd, int mode, loff_t offset, loff_t len);
 int shim_timerfd_settime(int ufd, int flags, const struct __kernel_itimerspec* utmr,
                          struct __kernel_itimerspec* otmr);
 int shim_timerfd_gettime(int ufd, struct __kernel_itimerspec* otmr);
 int shim_accept4(int sockfd, struct sockaddr* addr, socklen_t* addrlen, int flags);
 int shim_signalfd4(int ufd, __sigset_t* user_mask, size_t sizemask, int flags);
-int shim_eventfd2(int count, int flags);
+int shim_eventfd2(unsigned int count, int flags);
 int shim_epoll_create1(int flags);
 int shim_dup3(int oldfd, int newfd, int flags);
 int shim_pipe2(int* fildes, int flags);

+ 1 - 1
LibOS/shim/src/Makefile

@@ -39,7 +39,7 @@ files_to_install = $(addprefix $(RUNTIME_DIR)/,$(files_to_build))
 defs	= -DIN_SHIM
 CFLAGS += $(defs)
 ASFLAGS += $(defs)
-fs	= chroot str pipe socket proc dev
+fs	= chroot str pipe socket proc dev eventfd
 ipcns	= pid sysv
 objs	= $(addprefix bookkeep/shim_,handle vma thread signal) \
 	  $(patsubst %.c,%,$(wildcard utils/*.c)) \

+ 110 - 0
LibOS/shim/src/fs/eventfd/fs.c

@@ -0,0 +1,110 @@
+/* Copyright (C) 2019 Intel Corporation
+   This file is part of Graphene Library OS.
+
+   Graphene Library OS is free software: you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License
+   as published by the Free Software Foundation, either version 3 of the
+   License, or (at your option) any later version.
+
+   Graphene Library OS is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+ * fs.c
+ *
+ * This file contains codes for implementation of 'eventfd' filesystem.
+ */
+
+#include <asm/fcntl.h>
+#include <asm/unistd.h>
+#include <errno.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <pal.h>
+#include <shim_internal.h>
+#include <shim_handle.h>
+#include <shim_fs.h>
+
+static ssize_t eventfd_read(struct shim_handle* hdl, void* buf, size_t count) {
+    if (!count)
+        return 0;
+
+    if (count < sizeof(uint64_t))
+        return -EINVAL;
+
+    PAL_NUM bytes = DkStreamRead(hdl->pal_handle, 0, count, buf, NULL, 0);
+
+    if (bytes <= 0)
+        return -PAL_ERRNO;
+
+    assert((ssize_t ) bytes == sizeof(uint64_t));
+
+    return (ssize_t) bytes;
+}
+
+static ssize_t eventfd_write(struct shim_handle* hdl, const void* buf, size_t count) {
+    if (!count)
+        return 0;
+
+    if (count < sizeof(uint64_t))
+        return -EINVAL;
+
+    PAL_NUM bytes = DkStreamWrite(hdl->pal_handle, 0, count, (void *) buf, NULL);
+
+    if (bytes <= 0)
+        return -PAL_ERRNO;
+
+    assert((ssize_t ) bytes == sizeof(uint64_t));
+
+    return (ssize_t) bytes;
+}
+
+static off_t eventfd_poll(struct shim_handle* hdl, int poll_type) {
+    off_t ret = 0;
+
+    lock(&hdl->lock);
+
+    if (!hdl->pal_handle) {
+        ret = -EBADF;
+        goto out;
+    }
+
+    PAL_STREAM_ATTR attr;
+    if (!DkStreamAttributesQueryByHandle(hdl->pal_handle, &attr)) {
+        ret = -PAL_ERRNO;
+        goto out;
+    }
+
+    if (poll_type == FS_POLL_SZ) {
+        ret = attr.pending_size;
+        goto out;
+    }
+
+    ret = 0;
+    if (attr.disconnected)
+        ret |= FS_POLL_ER;
+    if ((poll_type & FS_POLL_RD) && attr.readable)
+        ret |= FS_POLL_RD;
+    if ((poll_type & FS_POLL_WR) && attr.writable)
+        ret |= FS_POLL_WR;
+
+out:
+    unlock(&hdl->lock);
+    return ret;
+}
+
+struct shim_fs_ops eventfd_fs_ops = {
+    .read = &eventfd_read,
+    .write = &eventfd_write,
+    .poll = &eventfd_poll,
+};
+
+struct shim_mount eventfd_builtin_fs = {
+    .type = "eventfd",
+    .fs_ops = &eventfd_fs_ops,
+};

+ 2 - 1
LibOS/shim/src/fs/shim_fs.c

@@ -46,13 +46,14 @@ struct shim_fs mountable_fs [NUM_MOUNTABLE_FS] = {
         { .name = "dev",    .fs_ops = &dev_fs_ops,    .d_ops = &dev_d_ops,    },
     };
 
-#define NUM_BUILTIN_FS      4
+#define NUM_BUILTIN_FS      5
 
 struct shim_mount * builtin_fs [NUM_BUILTIN_FS] = {
                 &chroot_builtin_fs,
                 &pipe_builtin_fs,
                 &socket_builtin_fs,
                 &epoll_builtin_fs,
+                &eventfd_builtin_fs,
         };
 
 static struct shim_lock mount_mgr_lock;

+ 3 - 3
LibOS/shim/src/shim_syscalls.c

@@ -963,8 +963,6 @@ SHIM_SYSCALL_PASSTHROUGH(signalfd, 3, int, int, ufd, __sigset_t*, user_mask, siz
 
 SHIM_SYSCALL_PASSTHROUGH(timerfd_create, 2, int, int, clockid, int, flags)
 
-SHIM_SYSCALL_PASSTHROUGH(eventfd, 1, int, int, count)
-
 SHIM_SYSCALL_PASSTHROUGH(fallocate, 4, int, int, fd, int, mode, loff_t, offset, loff_t, len)
 
 SHIM_SYSCALL_PASSTHROUGH(timerfd_settime, 4, int, int, ufd, int, flags,
@@ -979,7 +977,9 @@ DEFINE_SHIM_SYSCALL(accept4, 4, shim_do_accept4, int, int, sockfd, struct sockad
 SHIM_SYSCALL_PASSTHROUGH(signalfd4, 4, int, int, ufd, __sigset_t*, user_mask, size_t, sizemask, int,
                          flags)
 
-SHIM_SYSCALL_PASSTHROUGH(eventfd2, 2, int, int, count, int, flags)
+DEFINE_SHIM_SYSCALL(eventfd, 1, shim_do_eventfd, int, unsigned int, count)
+
+DEFINE_SHIM_SYSCALL (eventfd2, 2, shim_do_eventfd2, int, unsigned int, count, int, flags)
 
 /* epoll_create1: sys/shim_epoll.c */
 DEFINE_SHIM_SYSCALL(epoll_create1, 1, shim_do_epoll_create1, int, int, flags)

+ 89 - 0
LibOS/shim/src/sys/shim_eventfd.c

@@ -0,0 +1,89 @@
+/* Copyright (C) 2019 Intel Corporation
+   This file is part of Graphene Library OS.
+
+   Graphene Library OS is free software: you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License
+   as published by the Free Software Foundation, either version 3 of the
+   License, or (at your option) any later version.
+
+   Graphene Library OS is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+ * shim_eventfd.c
+ *
+ * Implementation of system calls "eventfd" and "eventfd2".
+ */
+
+#include <asm/fcntl.h>
+#include <sys/eventfd.h>
+#include <pal.h>
+#include <pal_error.h>
+#include <shim_internal.h>
+#include <shim_utils.h>
+#include <shim_table.h>
+#include <shim_handle.h>
+#include <shim_fs.h>
+
+static int create_eventfd(PAL_HANDLE* efd, unsigned count, int flags) {
+    PAL_HANDLE hdl = NULL;
+    int pal_flags = 0;
+
+    pal_flags |= flags & EFD_NONBLOCK ? PAL_OPTION_NONBLOCK : 0;
+    pal_flags |= flags & EFD_CLOEXEC ? PAL_OPTION_CLOEXEC : 0;
+    pal_flags |= flags & EFD_SEMAPHORE ? PAL_OPTION_EFD_SEMAPHORE : 0;
+
+    /* eventfd() requires count (aka initval) but PAL's DkStreamOpen() doesn't have such an argument.
+     * Using create arg as a work-around (note: initval is uint32 but create is int32).*/
+    if (!(hdl = DkStreamOpen("eventfd:", 0, 0, count, pal_flags))) {
+        debug("eventfd open failure\n");
+        return -PAL_ERRNO;
+    }
+
+    *efd = hdl;
+    return 0;
+
+}
+
+int shim_do_eventfd2(unsigned int count, int flags) {
+    int ret = 0;
+    struct shim_handle* hdl = get_new_handle();
+
+    if (!hdl) {
+        ret = -ENOMEM;
+        goto out;
+    }
+
+    hdl->type = TYPE_EVENTFD;
+    set_handle_fs(hdl, &eventfd_builtin_fs);
+    hdl->flags = O_RDWR;
+    hdl->acc_mode = MAY_READ | MAY_WRITE;
+
+    if ((ret = create_eventfd(&hdl->pal_handle, count, flags)) < 0)
+        goto out;
+
+    flags = flags & EFD_CLOEXEC ? FD_CLOEXEC : 0;
+
+    /* get_new_handle() above increments hdl's refcount.
+     * Followed by another increment inside set_new_fd_handle.
+     * So we need to put_handle() afterwards. */
+    int vfd = set_new_fd_handle(hdl, flags, NULL);
+
+    ret = vfd;
+
+out:
+    if (hdl)
+        put_handle(hdl);
+
+    return ret;
+
+}
+
+int shim_do_eventfd(unsigned int count) {
+    return shim_do_eventfd2(count, 0);
+}

+ 1 - 0
LibOS/shim/test/regression/Makefile

@@ -22,6 +22,7 @@ CFLAGS-openmp = -fopenmp
 CFLAGS-multi_pthread = -pthread
 CFLAGS-exit_group = -pthread
 CFLAGS-abort_multithread = -pthread
+CFLAGS-eventfd = -pthread
 
 %: %.c
 	$(call cmd,csingle)

+ 163 - 0
LibOS/shim/test/regression/eventfd.c

@@ -0,0 +1,163 @@
+#include <errno.h>
+
+#include <poll.h>
+#include <pthread.h>
+
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <sys/eventfd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define MAX_EFDS 3
+
+int efds[MAX_EFDS] = { 0 };
+
+void* write_eventfd_thread(void* arg) {
+    uint64_t count = 10;
+
+    int* efds = (int*) arg;
+
+    if (!arg) {
+        printf("arg is NULL\n");
+        return NULL;
+    }
+
+    printf("%s:got here\n", __func__);
+
+    for (int i = 0; i < MAX_EFDS; i++) {
+        printf("%s: efd = %d\n", __func__, efds[i]);
+    }
+
+    for (int i = 0; i < MAX_EFDS; i++) {
+        sleep(1);
+        write(efds[i], &count, sizeof(count));
+        count += 1;
+    }
+
+    return NULL;
+}
+
+/* This function used to test polling on a group of eventfd descriptors.
+ * To support regression testing, positive value returned for error case. */
+int eventfd_using_poll() {
+    int ret = 0;
+    struct pollfd pollfds[MAX_EFDS];
+    pthread_t tid = 0;
+    uint64_t count = 0;
+    int poll_ret = 0;
+    int nread_events = 0;
+
+    for (int i = 0; i < MAX_EFDS; i++) {
+        efds[i] = eventfd(0, 0);
+
+        if (efds[i] < 0) {
+            perror("eventfd failed");
+            return 1;
+        }
+
+        printf("efd = %d\n", efds[i]);
+
+        pollfds[i].fd = efds[i];
+        pollfds[i].events = POLLIN;
+    }
+
+    ret = pthread_create(&tid, NULL, write_eventfd_thread, efds);
+
+    if (ret != 0) {
+        perror("error in thread creation\n");
+        return 1;
+    }
+
+    while (1) {
+        poll_ret = poll(pollfds, MAX_EFDS, 5000);
+
+        if (poll_ret == 0) {
+            printf("Poll timed out. Exiting.\n");
+            break;
+        }
+
+        if (poll_ret < 0) {
+            perror("error from poll");
+            ret = 1;
+            break;
+        }
+
+        for (int i = 0; i < MAX_EFDS; i++) {
+            if (pollfds[i].revents & POLLIN) {
+                pollfds[i].revents = 0;
+                errno = 0;
+                read(pollfds[i].fd, &count, sizeof(count));
+                printf("fd set=%d\n", pollfds[i].fd);
+                printf("efd = %d, count: %lu, errno=%d\n", pollfds[i].fd,
+                        count, errno);
+                nread_events++;
+            }
+        }
+    }
+
+    if (nread_events == MAX_EFDS) {
+        printf("%s completed successfully\n", __func__);
+    } else
+        printf("%s: nread_events=%d, MAX_EFDS=%d\n", __func__, nread_events, MAX_EFDS);
+
+    pthread_join(tid, NULL);
+    return ret;
+}
+
+/* This function used to test various flags supported while creating eventfd descriptors.
+ * Note: EFD_SEMAPHORE has not been tested.
+ * To support regression testing, positive value returned for error case. */
+int eventfd_using_various_flags() {
+    uint64_t count = 0;
+    int efd = 0;
+    int eventfd_flags[] = { 0, EFD_NONBLOCK, EFD_CLOEXEC, EFD_NONBLOCK | EFD_CLOEXEC };
+
+    for (int i = 0; i < sizeof(eventfd_flags) / sizeof(int); i++) {
+        printf("iteration #-%d, flags=%d\n", i, eventfd_flags[i]);
+
+        efd = eventfd(0, eventfd_flags[i]);
+
+        if (efd < 0) {
+            perror("eventfd failed");
+            printf("eventfd error for iteration #-%d, flags-%d\n", i, eventfd_flags[i]);
+            return 1;
+        }
+
+        count = 5;
+        eventfd_write(efd, count);
+        eventfd_write(efd, count);
+        count = 0;
+        errno = 0;
+        eventfd_read(efd, &count);
+        printf("efd = %d, count: %lu, errno=%d\n", efd, count, errno);
+
+        /* calling the second read would block if flags doesn't have EFD_NONBLOCK */
+        if (eventfd_flags[i] & EFD_NONBLOCK) {
+            count = 0;
+            errno = 0;
+            eventfd_read(efd, &count);
+            printf("efd = %d, count: %lu, errno=%d\n", efd, count, errno);
+        }
+
+        close(efd);
+    }
+
+    printf("%s completed successfully\n", __func__);
+
+    return 0;
+}
+
+int main(int argc, char* argv[]) {
+    int ret = 0;
+
+    ret = eventfd_using_poll();
+    ret += eventfd_using_various_flags();
+
+    return ret;
+}

+ 7 - 0
LibOS/shim/test/regression/test_libos.py

@@ -311,6 +311,13 @@ class TC_30_Syscall(RegressionTestCase):
         self.assertIn('OK on sigaltstack in main thread', stdout)
         self.assertIn('done exiting', stdout)
 
+    def test_070_eventfd(self):
+        stdout, stderr = self.run_binary(['eventfd'])
+
+        # Eventfd Test
+        self.assertIn('eventfd_using_poll completed successfully', stdout)
+        self.assertIn('eventfd_using_various_flags completed successfully', stdout)
+
 @unittest.skipUnless(HAS_SGX,
     'This test is only meaningful on SGX PAL because only SGX catches raw '
     'syscalls and redirects to Graphene\'s LibOS. If we will add seccomp to '

+ 4 - 0
Pal/src/db_streams.c

@@ -45,6 +45,7 @@ extern struct handle_ops mutex_ops;
 extern struct handle_ops event_ops;
 extern struct handle_ops gipc_ops;
 extern struct handle_ops mcast_ops;
+extern struct handle_ops eventfd_ops;
 
 const struct handle_ops* pal_handle_ops[PAL_HANDLE_TYPE_BOUND] = {
     [pal_type_file]    = &file_ops,
@@ -64,6 +65,7 @@ const struct handle_ops* pal_handle_ops[PAL_HANDLE_TYPE_BOUND] = {
     [pal_type_mutex]   = &mutex_ops,
     [pal_type_event]   = &event_ops,
     [pal_type_gipc]    = &gipc_ops,
+    [pal_type_eventfd] = &eventfd_ops,
 };
 
 /* parse_stream_uri scan the uri, seperate prefix and search for
@@ -106,6 +108,8 @@ static int parse_stream_uri(const char** uri, char** prefix, struct handle_ops**
                 hops = &tcp_ops;
             else if (strstartswith_static(u, "udp.srv"))
                 hops = &udp_ops;
+            else if (strstartswith_static(u, "eventfd"))
+                hops = &eventfd_ops;
             break;
 
         case 8:

+ 1 - 1
Pal/src/host/FreeBSD/pal_host.h

@@ -47,7 +47,7 @@ typedef struct mutex_handle {
 
 #define _DkInternalLock _DkMutexLock
 #define _DkInternalUnlock _DkMutexUnlock
-#define MAX_FDS 3
+
 typedef union pal_handle
 {
     /* TSAI: Here we define the internal types of PAL_HANDLE

+ 1 - 1
Pal/src/host/Linux-SGX/Makefile

@@ -12,7 +12,7 @@ host_files = libpal-Linux-SGX.a pal-sgx debugger/sgx_gdb.so pal.map generated_of
 defs	= -DIN_PAL -DPAL_DIR=$(PAL_DIR) -DRUNTIME_DIR=$(RUNTIME_DIR)
 CFLAGS += $(defs)
 ASFLAGS += $(defs)
-enclave-objs = $(addprefix db_,files devices pipes sockets streams memory \
+enclave-objs = $(addprefix db_,files devices pipes eventfd sockets streams memory \
 		 threading mutex events process object main rtld \
 		 exception misc ipc spinlock) \
 	       $(addprefix enclave_,ocalls ecalls framework platform pages untrusted)

+ 187 - 0
Pal/src/host/Linux-SGX/db_eventfd.c

@@ -0,0 +1,187 @@
+/* Copyright (C) 2019 Intel Corporation
+   This file is part of Graphene Library OS.
+
+   Graphene Library OS is free software: you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License
+   as published by the Free Software Foundation, either version 3 of the
+   License, or (at your option) any later version.
+
+   Graphene Library OS is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+ * db_eventfd.c
+ *
+ * This file contains operations to handle streams with URIs that have
+ * "eventfd:".
+ */
+
+#include <asm/fcntl.h>
+#include <asm/poll.h>
+#include <linux/un.h>
+#include <linux/types.h>
+#include <sys/eventfd.h>
+
+#include "api.h"
+#include "pal_defs.h"
+#include "pal_linux_defs.h"
+#include "pal.h"
+#include "pal_internal.h"
+#include "pal_linux.h"
+#include "pal_linux_error.h"
+#include "pal_error.h"
+#include "pal_security.h"
+#include "pal_debug.h"
+
+static inline int eventfd_type(int options) {
+    int type = 0;
+    if (options & PAL_OPTION_NONBLOCK)
+        type |= EFD_NONBLOCK;
+
+    if (options & PAL_OPTION_CLOEXEC)
+        type |= EFD_CLOEXEC;
+
+    if (options & PAL_OPTION_EFD_SEMAPHORE)
+        type |= EFD_SEMAPHORE;
+
+    return type;
+}
+
+/* `type` must be eventfd, `uri` & `access` & `share` are unused,
+ * `create` holds eventfd's initval, `options` holds eventfd's flags */
+static int eventfd_pal_open(PAL_HANDLE* handle, const char* type, const char* uri, int access,
+        int share, int create, int options) {
+    int ret;
+    __UNUSED(access);
+    __UNUSED(share);
+
+    if ((strcmp_static(type, "eventfd") != 0) || (*uri != '\0')) {
+        return -PAL_ERROR_INVAL;
+    }
+
+    /* Using create arg as a work-around (note: initval is uint32 but create is int32).*/
+    ret = ocall_eventfd(create, eventfd_type(options));
+
+    if (IS_ERR(ret))
+        return unix_to_pal_error(ERRNO(ret));
+
+    PAL_HANDLE hdl = malloc(HANDLE_SIZE(eventfd));
+    SET_HANDLE_TYPE(hdl, eventfd);
+
+    /* Note: using index 0, given that there is only 1 eventfd FD per pal-handle. */
+    HANDLE_HDR(hdl)->flags = RFD(0) | WFD(0) | WRITABLE(0);
+
+    hdl->eventfd.fd = ret;
+    hdl->eventfd.nonblocking = (options & PAL_OPTION_NONBLOCK) ? PAL_TRUE : PAL_FALSE;
+    *handle = hdl;
+
+    return 0;
+
+}
+
+static int64_t eventfd_pal_read(PAL_HANDLE handle, uint64_t offset, uint64_t len, void* buffer) {
+    if (offset)
+        return -PAL_ERROR_INVAL;
+
+    if (!IS_HANDLE_TYPE(handle, eventfd))
+        return -PAL_ERROR_NOTCONNECTION;
+
+    if (len < sizeof(uint64_t))
+        return -PAL_ERROR_INVAL;
+
+    /* TODO: verify that the value returned in buffer is somehow meaningful
+     * (to prevent Iago attacks) */
+    int bytes = ocall_read(handle->eventfd.fd, buffer, len);
+
+    if (IS_ERR(bytes))
+        return unix_to_pal_error(ERRNO(bytes));
+
+    if (!bytes)
+        return -PAL_ERROR_ENDOFSTREAM;
+
+    return bytes;
+}
+
+static int64_t eventfd_pal_write(PAL_HANDLE handle, uint64_t offset, uint64_t len,
+        const void* buffer) {
+    if (offset)
+        return -PAL_ERROR_INVAL;
+
+    if (!IS_HANDLE_TYPE(handle, eventfd))
+        return -PAL_ERROR_NOTCONNECTION;
+
+    if (len < sizeof(uint64_t))
+        return -PAL_ERROR_INVAL;
+
+    int bytes = ocall_write(handle->eventfd.fd, buffer, len);
+    PAL_FLG writable = WRITABLE(0);
+
+    if (IS_ERR(bytes)) {
+        if (ERRNO(bytes) == EAGAIN)
+            HANDLE_HDR(handle)->flags &= ~writable;
+        return unix_to_pal_error(ERRNO(bytes));
+    }
+
+    /* whether fd is writable or not, gets updated here,
+     * to optimize polling logic in _DkObjectsWaitAny */
+    if ((uint64_t) bytes == sizeof(uint64_t))
+        HANDLE_HDR(handle)->flags |= writable;
+    else
+        HANDLE_HDR(handle)->flags &= ~writable;
+
+    return bytes;
+}
+
+/* invoked during poll operation on eventfd from LibOS. */
+static int eventfd_pal_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
+    if (handle->generic.fds[0] == PAL_IDX_POISON)
+        return -PAL_ERROR_BADHANDLE;
+
+    attr->handle_type = PAL_GET_TYPE(handle);
+
+    int efd = handle->eventfd.fd;
+    int flags = HANDLE_HDR(handle)->flags;
+
+    struct pollfd pfd = { .fd = efd, .events = POLLIN, .revents = 0 };
+    int ret = ocall_poll(&pfd, 1, 0);
+
+    if (IS_ERR(ret))
+        return unix_to_pal_error(ERRNO(ret));
+
+    attr->readable = (ret == 1 && pfd.revents == POLLIN);
+    attr->disconnected = flags & ERROR(0);
+    attr->nonblocking = handle->eventfd.nonblocking;
+
+    /* For future use, so that Linux host kernel can send notifications to user-space apps.
+     * App receives virtual FD from LibOS, but the Linux-host eventfd is memorized
+     * here, such that this Linux-host eventfd can be retrieved (by LibOS) during app's ioctl(). */
+    attr->no_of_fds = 1;
+    attr->fds[0] = efd;
+
+    return 0;
+}
+
+static int eventfd_pal_close(PAL_HANDLE handle) {
+    if (IS_HANDLE_TYPE(handle, eventfd)) {
+        if (handle->eventfd.fd != PAL_IDX_POISON) {
+            ocall_close(handle->eventfd.fd);
+            handle->eventfd.fd = PAL_IDX_POISON;
+        }
+        return 0;
+    }
+
+    return 0;
+}
+
+struct handle_ops eventfd_ops = {
+    .open               = &eventfd_pal_open,
+    .read               = &eventfd_pal_read,
+    .write              = &eventfd_pal_write,
+    .close              = &eventfd_pal_close,
+    .attrquerybyhdl     = &eventfd_pal_attrquerybyhdl,
+};

+ 20 - 0
Pal/src/host/Linux-SGX/enclave_ocalls.c

@@ -1243,3 +1243,23 @@ reset:
 out:
     return retval;
 }
+
+int ocall_eventfd (unsigned int initval, int flags)
+{
+    int retval = 0;
+    ms_ocall_eventfd_t * ms;
+
+    ms = sgx_alloc_on_ustack(sizeof(*ms));
+    if (!ms) {
+        sgx_reset_ustack();
+        return -EPERM;
+    }
+
+    ms->ms_initval = initval;
+    ms->ms_flags   = flags;
+
+    retval = sgx_ocall(OCALL_EVENTFD, ms);
+
+    sgx_reset_ustack();
+    return retval;
+}

+ 2 - 0
Pal/src/host/Linux-SGX/enclave_ocalls.h

@@ -104,3 +104,5 @@ int ocall_load_debug (const char * command);
 int ocall_get_attestation(const sgx_spid_t* spid, const char* subkey, bool linkable,
                           const sgx_report_t* report, const sgx_quote_nonce_t* nonce,
                           sgx_attestation_t* attestation);
+int ocall_eventfd (unsigned int initval, int flags);
+

+ 6 - 0
Pal/src/host/Linux-SGX/ocall_types.h

@@ -59,6 +59,7 @@ enum {
     OCALL_DELETE,
     OCALL_LOAD_DEBUG,
     OCALL_GET_ATTESTATION,
+    OCALL_EVENTFD,
     OCALL_NR,
 };
 
@@ -286,4 +287,9 @@ typedef struct {
     sgx_attestation_t ms_attestation;
 } ms_ocall_get_attestation_t;
 
+typedef struct {
+    unsigned int ms_initval;
+    int          ms_flags;
+} ms_ocall_eventfd_t;
+
 #pragma pack(pop)

+ 6 - 1
Pal/src/host/Linux-SGX/pal_host.h

@@ -40,7 +40,6 @@ int _DkSpinUnlock (struct spinlock * lock);
 #define LOCK_INIT   { .value =  { 0 } }
 #define _DkInternalLock _DkSpinLock
 #define _DkInternalUnlock _DkSpinUnlock
-#define MAX_FDS 3
 
 void * malloc_untrusted (int size);
 void free_untrusted (void * mem);
@@ -113,6 +112,12 @@ typedef struct pal_handle
             PAL_BOL nonblocking;
         } pipeprv;
 
+        struct {
+            PAL_IDX fd;
+            /* TODO: add other flags in future, if needed (e.g., semaphore) */
+            PAL_BOL nonblocking;
+        } eventfd;
+
         struct {
             PAL_IDX fd_in, fd_out;
             PAL_IDX dev_type;

+ 13 - 1
Pal/src/host/Linux-SGX/sgx_enclave.c

@@ -670,6 +670,17 @@ static int sgx_ocall_delete(void * pms)
     return ret;
 }
 
+static int sgx_ocall_eventfd (void * pms)
+{
+    ms_ocall_eventfd_t * ms = (ms_ocall_eventfd_t *) pms;
+    int ret;
+    ODEBUG(OCALL_EVENTFD, ms);
+
+    ret = INLINE_SYSCALL(eventfd2, 2, ms->ms_initval, ms->ms_flags);
+
+    return ret;
+}
+
 void load_gdb_command (const char * command);
 
 static int sgx_ocall_load_debug(void * pms)
@@ -727,7 +738,8 @@ sgx_ocall_fn_t ocall_table[OCALL_NR] = {
         [OCALL_DELETE]          = sgx_ocall_delete,
         [OCALL_LOAD_DEBUG]      = sgx_ocall_load_debug,
         [OCALL_GET_ATTESTATION] = sgx_ocall_get_attestation,
-    };
+        [OCALL_EVENTFD]         = sgx_ocall_eventfd,
+};
 
 #define EDEBUG(code, ms) do {} while (0)
 

+ 1 - 1
Pal/src/host/Linux/Makefile

@@ -15,7 +15,7 @@ host_files = libpal-Linux.a pal.map
 defs	= -DIN_PAL -DPAL_DIR=$(PAL_DIR) -DRUNTIME_DIR=$(RUNTIME_DIR)
 CFLAGS += $(defs)
 ASFLAGS += $(defs)
-objs	= $(addprefix db_,files devices pipes sockets streams memory threading \
+objs	= $(addprefix db_,files devices pipes eventfd sockets streams memory threading \
 	    mutex events process object main rtld misc ipc \
 	    exception) clone-x86_64
 graphene_lib = .lib/graphene-lib.a

+ 186 - 0
Pal/src/host/Linux/db_eventfd.c

@@ -0,0 +1,186 @@
+/* Copyright (C) 2019 Intel Corporation
+   This file is part of Graphene Library OS.
+
+   Graphene Library OS is free software: you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License
+   as published by the Free Software Foundation, either version 3 of the
+   License, or (at your option) any later version.
+
+   Graphene Library OS is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+ * db_eventfd.c
+ *
+ * This file contains operations to handle streams with URIs that have
+ * "eventfd:".
+ */
+
+#include <asm/fcntl.h>
+#include <asm/poll.h>
+#include <linux/un.h>
+#include <linux/time.h>
+#include <linux/types.h>
+#include <sys/eventfd.h>
+
+#include "api.h"
+#include "pal_defs.h"
+#include "pal_linux_defs.h"
+#include "pal.h"
+#include "pal_internal.h"
+#include "pal_linux.h"
+#include "pal_linux_error.h"
+#include "pal_error.h"
+#include "pal_security.h"
+#include "pal_debug.h"
+
+static inline int eventfd_type(int options) {
+    int type = 0;
+    if (options & PAL_OPTION_NONBLOCK)
+        type |= EFD_NONBLOCK;
+
+    if (options & PAL_OPTION_CLOEXEC)
+        type |= EFD_CLOEXEC;
+
+    if (options & PAL_OPTION_EFD_SEMAPHORE)
+        type |= EFD_SEMAPHORE;
+
+    return type;
+}
+
+/* `type` must be eventfd, `uri` & `access` & `share` are unused,
+ * `create` holds eventfd's initval, `options` holds eventfd's flags */
+static int eventfd_pal_open(PAL_HANDLE* handle, const char* type, const char* uri, int access,
+        int share, int create, int options) {
+    int ret;
+
+    __UNUSED(access);
+    __UNUSED(share);
+
+    if ((strcmp_static(type, "eventfd") != 0) || (*uri != '\0')) {
+        return -PAL_ERROR_INVAL;
+    }
+
+    /* Using create arg as a work-around (note: initval is uint32 but create is int32).*/
+    ret = INLINE_SYSCALL(eventfd2, 2, create, eventfd_type(options));
+
+    if (IS_ERR(ret))
+        return unix_to_pal_error(ERRNO(ret));
+
+    PAL_HANDLE hdl = malloc(HANDLE_SIZE(eventfd));
+    SET_HANDLE_TYPE(hdl, eventfd);
+
+    /* Note: using index 0, given that there is only 1 eventfd FD per pal-handle. */
+    HANDLE_HDR(hdl)->flags = RFD(0) | WFD(0) | WRITABLE(0);
+
+    hdl->eventfd.fd = ret;
+    hdl->eventfd.nonblocking = (options & PAL_OPTION_NONBLOCK) ? PAL_TRUE : PAL_FALSE;
+    *handle = hdl;
+
+    return 0;
+
+}
+
+static int64_t eventfd_pal_read(PAL_HANDLE handle, uint64_t offset, uint64_t len, void* buffer) {
+    if (offset)
+        return -PAL_ERROR_INVAL;
+
+    if (!IS_HANDLE_TYPE(handle, eventfd))
+        return -PAL_ERROR_NOTCONNECTION;
+
+    if (len < sizeof(uint64_t))
+        return -PAL_ERROR_INVAL;
+
+    int bytes = INLINE_SYSCALL(read, 3, handle->eventfd.fd, buffer, len);
+
+    if (IS_ERR(bytes))
+        return unix_to_pal_error(ERRNO(bytes));
+
+    if (!bytes)
+        return -PAL_ERROR_ENDOFSTREAM;
+
+    return bytes;
+}
+
+static int64_t eventfd_pal_write(PAL_HANDLE handle, uint64_t offset, uint64_t len,
+        const void* buffer) {
+    if (offset)
+        return -PAL_ERROR_INVAL;
+
+    if (!IS_HANDLE_TYPE(handle, eventfd))
+        return -PAL_ERROR_NOTCONNECTION;
+
+    if (len < sizeof(uint64_t))
+        return -PAL_ERROR_INVAL;
+
+    int bytes = INLINE_SYSCALL(write, 3, handle->eventfd.fd, buffer, len);
+    PAL_FLG writable = WRITABLE(0);
+
+    if (IS_ERR(bytes)) {
+        if (ERRNO(bytes) == EAGAIN)
+            HANDLE_HDR(handle)->flags &= ~writable;
+        return unix_to_pal_error(ERRNO(bytes));
+    }
+
+    /* whether fd is writable or not, gets updated here,
+     * to optimize polling logic in _DkObjectsWaitAny */
+    if ((uint64_t) bytes == sizeof(uint64_t))
+        HANDLE_HDR(handle)->flags |= writable;
+    else
+        HANDLE_HDR(handle)->flags &= ~writable;
+
+    return bytes;
+}
+
+/* invoked during poll operation on eventfd from LibOS. */
+static int eventfd_pal_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
+    if (handle->generic.fds[0] == PAL_IDX_POISON)
+        return -PAL_ERROR_BADHANDLE;
+
+    attr->handle_type = PAL_GET_TYPE(handle);
+    int efd = handle->eventfd.fd;
+    int flags = HANDLE_HDR(handle)->flags;
+
+    struct pollfd pfd = { .fd = efd, .events = POLLIN, .revents = 0 };
+    struct timespec tp = {0, 0};
+    int ret = INLINE_SYSCALL(ppoll, 5, &pfd, 1, &tp, NULL, 0);
+
+    if (IS_ERR(ret))
+        return unix_to_pal_error(ERRNO(ret));
+
+    attr->readable = (ret == 1 && pfd.revents == POLLIN);
+    attr->disconnected = flags & ERROR(0);
+    attr->nonblocking = handle->eventfd.nonblocking;
+
+    /* For future use, so that Linux host kernel can send notifications to user-space apps.
+     * App receives virtual FD from LibOS, but the Linux-host eventfd is memorized
+     * here, such that this Linux-host eventfd can be retrieved (by LibOS) during app's ioctl(). */
+    attr->no_of_fds = 1;
+    attr->fds[0] = efd;
+
+    return 0;
+}
+
+static int eventfd_pal_close(PAL_HANDLE handle) {
+    if (IS_HANDLE_TYPE(handle, eventfd)) {
+        if (handle->eventfd.fd != PAL_IDX_POISON) {
+            INLINE_SYSCALL(close, 1, handle->eventfd.fd);
+            handle->eventfd.fd = PAL_IDX_POISON;
+        }
+    }
+
+    return 0;
+}
+
+struct handle_ops eventfd_ops = {
+    .open               = &eventfd_pal_open,
+    .read               = &eventfd_pal_read,
+    .write              = &eventfd_pal_write,
+    .close              = &eventfd_pal_close,
+    .attrquerybyhdl     = &eventfd_pal_attrquerybyhdl,
+};

+ 6 - 2
Pal/src/host/Linux/pal_host.h

@@ -67,8 +67,6 @@ typedef struct {
 #endif
 } PAL_RESERVED_HDR;
 
-#define MAX_FDS 3
-
 typedef struct pal_handle
 {
     /* TSAI: Here we define the internal types of PAL_HANDLE
@@ -106,6 +104,12 @@ typedef struct pal_handle
             PAL_BOL nonblocking;
         } pipeprv;
 
+        struct {
+            PAL_IDX fd;
+            /* TODO: add other flags in future, if needed (e.g., semaphore) */
+            PAL_BOL nonblocking;
+        } eventfd;
+
         struct {
             PAL_IDX fd_in, fd_out;
             PAL_IDX dev_type;

+ 1 - 1
Pal/src/host/Skeleton/Makefile

@@ -9,7 +9,7 @@ host_files = libpal-Skeleton.a pal.map
 defs	= -DIN_PAL
 CFLAGS += $(defs)
 ASFLAGS += $(defs)
-objs	= $(addprefix db_,files devices pipes sockets streams memory threading \
+objs	= $(addprefix db_,files devices pipes eventfd sockets streams memory threading \
 	    mutex events process object main rtld misc ipc exception)
 headers	= $(wildcard *.h) $(wildcard ../../*.h) $(wildcard ../../../lib/*.h)
 

+ 63 - 0
Pal/src/host/Skeleton/db_eventfd.c

@@ -0,0 +1,63 @@
+/* Copyright (C) 2019 Intel Corporation
+   This file is part of Graphene Library OS.
+
+   Graphene Library OS is free software: you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License
+   as published by the Free Software Foundation, either version 3 of the
+   License, or (at your option) any later version.
+
+   Graphene Library OS is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/*
+ * db_eventfd.c
+ *
+ * This file contains operations to handle streams with URIs that have
+ * "eventfd:".
+ */
+
+#include "pal_defs.h"
+#include "pal.h"
+#include "pal_internal.h"
+#include "pal_error.h"
+#include "api.h"
+
+/* `type` must be eventfd, `uri` & `access` & `share` are unused,
+ * `create` holds eventfd's initval, `options` holds eventfd's flags */
+static int eventfd_pal_open(PAL_HANDLE* handle, const char* type, const char* uri, int access,
+        int share, int create, int options) {
+    return -PAL_ERROR_NOTIMPLEMENTED;
+}
+
+/* offset does not apply here. */
+static int64_t eventfd_pal_read(PAL_HANDLE handle, uint64_t offset, uint64_t len, void* buffer) {
+    return -PAL_ERROR_NOTIMPLEMENTED;
+}
+
+/* offset does not apply here. */
+static int64_t eventfd_pal_write(PAL_HANDLE handle, uint64_t offset, uint64_t len,
+        const void* buffer) {
+    return -PAL_ERROR_NOTIMPLEMENTED;
+}
+
+/* gets used for polling(query) on eventfd from LibOS. */
+static int eventfd_pal_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
+    return -PAL_ERROR_NOTIMPLEMENTED;
+}
+
+static int eventfd_pal_close(PAL_HANDLE handle) {
+    return -PAL_ERROR_NOTIMPLEMENTED;
+}
+
+struct handle_ops eventfd_ops = {
+    .open               = &eventfd_pal_open,
+    .read               = &eventfd_pal_read,
+    .write              = &eventfd_pal_write,
+    .close              = &eventfd_pal_close,
+    .attrquerybyhdl     = &eventfd_pal_attrquerybyhdl,
+};

+ 4 - 2
Pal/src/host/Skeleton/pal_host.h

@@ -34,8 +34,6 @@ typedef struct mutex_handle {
 #define LOCK_INIT   {}
 #define INIT_LOCK(lock) do {} while (0)
 
-#define MAX_FDS     3
-
 typedef struct pal_handle
 {
     /* TSAI: Here we define the internal types of PAL_HANDLE
@@ -66,6 +64,10 @@ typedef struct pal_handle
             PAL_IDX fd;
         } pipeprv;
 
+        struct {
+            PAL_IDX unused;
+        } eventfd;
+
         struct {
             PAL_IDX fd;
             PAL_IDX dev_type;

+ 12 - 1
Pal/src/pal.h

@@ -35,6 +35,10 @@ typedef uint32_t      PAL_FLG;
 typedef uint32_t      PAL_IDX;
 typedef bool          PAL_BOL;
 
+/* Moved MAX_FDS from <host_kernel>/pal_host.h to here,
+ * since it is 3, across all host kernels. */
+#define MAX_FDS 3
+
 #ifdef IN_PAL
 #include <atomic.h>
 typedef struct atomic_int PAL_REF;
@@ -142,10 +146,10 @@ enum {
     pal_type_mutex,
     pal_type_event,
     pal_type_gipc,
+    pal_type_eventfd,
     PAL_HANDLE_TYPE_BOUND,
 };
 
-
 #define PAL_IDX_POISON          ((PAL_IDX)-1) /* PAL identifier poison value */
 #define PAL_GET_TYPE(h)         (HANDLE_HDR(h)->type)
 #define PAL_CHECK_TYPE(h, t)    (PAL_GET_TYPE(h) == pal_type_##t)
@@ -324,6 +328,11 @@ DkProcessExit (PAL_NUM exitCode);
 #define PAL_OPTION_NONBLOCK     04000
 #define PAL_OPTION_MASK         04000
 
+/* CLOEXEC is generic for any stream.
+ * SEMAPHORE is specific to eventfd syscall. */
+#define PAL_OPTION_CLOEXEC       01000
+#define PAL_OPTION_EFD_SEMAPHORE 02000
+
 #define WITHIN_MASK(val, mask)  (((val)|(mask)) == (mask))
 
 PAL_HANDLE
@@ -377,6 +386,8 @@ typedef struct {
     PAL_BOL readable, writable, runnable;
     PAL_FLG share_flags;
     PAL_NUM pending_size;
+    PAL_IDX no_of_fds;
+    PAL_IDX fds[MAX_FDS];
     union {
         struct {
             PAL_NUM linger;