Browse Source

[Pal] Remove WRITABLE flag from PAL handles

Previously, WRITABLE flag in PAL was used as a performance optimization
to skip host-based ppoll() syscall. However, ppoll() is used anyway to
update the READABLE flag. Therefore, there is no performance cost to
update WRITABLE via the same ppoll(), and we can remove complex logic of
updating WRITABLE on each write/send. Also, now Graphene has proper
emulation of select/poll/epoll via DkStreamsWaitEvents(), so there is
no performance benefit for caching WRITABLE anyway.
Dmitrii Kuvaiskii 4 years ago
parent
commit
d781de5b82

+ 5 - 15
Pal/src/host/Linux-SGX/db_eventfd.c

@@ -73,7 +73,7 @@ static int eventfd_pal_open(PAL_HANDLE* handle, const char* type, const char* ur
     SET_HANDLE_TYPE(hdl, eventfd);
 
     /* Note: using index 0, given that there is only 1 eventfd FD per pal-handle. */
-    HANDLE_HDR(hdl)->flags = RFD(0) | WFD(0) | WRITABLE(0);
+    HANDLE_HDR(hdl)->flags = RFD(0) | WFD(0);
 
     hdl->eventfd.fd          = ret;
     hdl->eventfd.nonblocking = (options & PAL_OPTION_NONBLOCK) ? PAL_TRUE : PAL_FALSE;
@@ -117,18 +117,8 @@ static int64_t eventfd_pal_write(PAL_HANDLE handle, uint64_t offset, uint64_t le
         return -PAL_ERROR_INVAL;
 
     int bytes = ocall_write(handle->eventfd.fd, buffer, len);
-    PAL_FLG writable = WRITABLE(0);
-
-    if (IS_ERR(bytes)) {
-        if (ERRNO(bytes) == EAGAIN)
-            HANDLE_HDR(handle)->flags &= ~writable;
+    if (IS_ERR(bytes))
         return unix_to_pal_error(ERRNO(bytes));
-    }
-
-    if ((uint64_t)bytes == sizeof(uint64_t))
-        HANDLE_HDR(handle)->flags |= writable;
-    else
-        HANDLE_HDR(handle)->flags &= ~writable;
 
     return bytes;
 }
@@ -143,7 +133,6 @@ static int eventfd_pal_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr)
     attr->handle_type  = HANDLE_HDR(handle)->type;
     attr->nonblocking  = handle->eventfd.nonblocking;
     attr->disconnected = HANDLE_HDR(handle)->flags & ERROR(0);
-    attr->writable     = HANDLE_HDR(handle)->flags & WRITABLE(0);
 
     /* get number of bytes available for reading */
     ret = ocall_fionread(handle->eventfd.fd);
@@ -153,12 +142,13 @@ static int eventfd_pal_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr)
     attr->pending_size = ret;
 
     /* query if there is data available for reading */
-    struct pollfd pfd = {.fd = handle->eventfd.fd, .events = POLLIN, .revents = 0};
+    struct pollfd pfd = {.fd = handle->eventfd.fd, .events = POLLIN | POLLOUT, .revents = 0};
     ret = ocall_poll(&pfd, 1, 0);
     if (IS_ERR(ret))
         return unix_to_pal_error(ERRNO(ret));
 
-    attr->readable = (ret == 1 && pfd.revents == POLLIN);
+    attr->readable = ret == 1 && (pfd.revents & (POLLIN | POLLERR | POLLHUP)) == POLLIN;
+    attr->writable = ret == 1 && (pfd.revents & (POLLOUT | POLLERR | POLLHUP)) == POLLOUT;
 
     /* For future use, so that Linux host kernel can send notifications to user-space apps. App
      * receives virtual FD from LibOS, but the Linux-host eventfd is memorized here, such that this

+ 1 - 1
Pal/src/host/Linux-SGX/db_files.c

@@ -57,7 +57,7 @@ static int file_open(PAL_HANDLE* handle, const char* type, const char* uri, int
     size_t len     = strlen(uri) + 1;
     PAL_HANDLE hdl = malloc(HANDLE_SIZE(file) + len);
     SET_HANDLE_TYPE(hdl, file);
-    HANDLE_HDR(hdl)->flags |= RFD(0) | WFD(0) | WRITABLE(0);
+    HANDLE_HDR(hdl)->flags |= RFD(0) | WFD(0);
     hdl->file.fd     = fd;
     char* path       = (void*)hdl + HANDLE_SIZE(file);
     int ret;

+ 0 - 2
Pal/src/host/Linux-SGX/db_object.c

@@ -146,8 +146,6 @@ int _DkStreamsWaitEvents(size_t count, PAL_HANDLE* handle_array, PAL_FLG* events
         for (size_t k = 0; k < MAX_FDS; k++) {
             if (hdl->generic.fds[k] != (PAL_IDX)fds[i].fd)
                 continue;
-            if (fds[i].revents & POLLOUT)
-                HANDLE_HDR(hdl)->flags |= WRITABLE(k);
             if (fds[i].revents & (POLLHUP|POLLERR|POLLNVAL))
                 HANDLE_HDR(hdl)->flags |= ERROR(k);
         }

+ 35 - 29
Pal/src/host/Linux-SGX/db_pipes.c

@@ -94,7 +94,7 @@ static int pipe_waitforclient(PAL_HANDLE handle, PAL_HANDLE* client) {
 
     PAL_HANDLE clnt = malloc(HANDLE_SIZE(pipe));
     SET_HANDLE_TYPE(clnt, pipecli);
-    HANDLE_HDR(clnt)->flags |= RFD(0) | WFD(0) | WRITABLE(0);
+    HANDLE_HDR(clnt)->flags |= RFD(0) | WFD(0);
     clnt->pipe.fd          = ret;
     clnt->pipe.nonblocking = PAL_FALSE;
     clnt->pipe.pipeid      = handle->pipe.pipeid;
@@ -118,7 +118,7 @@ static int pipe_connect(PAL_HANDLE* handle, PAL_NUM pipeid, int options) {
 
     PAL_HANDLE hdl = malloc(HANDLE_SIZE(pipe));
     SET_HANDLE_TYPE(hdl, pipe);
-    HANDLE_HDR(hdl)->flags |= RFD(0) | WFD(0) | WRITABLE(0);
+    HANDLE_HDR(hdl)->flags |= RFD(0) | WFD(0);
     hdl->pipe.fd          = ret;
     hdl->pipe.pipeid      = pipeid;
     hdl->pipe.nonblocking = (options & PAL_OPTION_NONBLOCK) ? PAL_TRUE : PAL_FALSE;
@@ -139,7 +139,7 @@ static int pipe_private(PAL_HANDLE* handle, int options) {
 
     PAL_HANDLE hdl = malloc(HANDLE_SIZE(pipeprv));
     SET_HANDLE_TYPE(hdl, pipeprv);
-    HANDLE_HDR(hdl)->flags |= RFD(0) | WFD(1) | WRITABLE(1);
+    HANDLE_HDR(hdl)->flags |= RFD(0) | WFD(1);
     hdl->pipeprv.fds[0]      = fds[0];
     hdl->pipeprv.fds[1]      = fds[1];
     hdl->pipeprv.nonblocking = (options & PAL_OPTION_NONBLOCK) ? PAL_TRUE : PAL_FALSE;
@@ -211,22 +211,11 @@ static int64_t pipe_write(PAL_HANDLE handle, uint64_t offset, uint64_t len, cons
     if (len >= (1ULL << (sizeof(unsigned int) * 8)))
         return -PAL_ERROR_INVAL;
 
-    int fd    = IS_HANDLE_TYPE(handle, pipeprv) ? handle->pipeprv.fds[1] : handle->pipe.fd;
-    int bytes = ocall_send(fd, buffer, len, NULL, 0, NULL, 0);
-
-    PAL_FLG writable = IS_HANDLE_TYPE(handle, pipeprv) ? WRITABLE(1) : WRITABLE(0);
+    int fd = IS_HANDLE_TYPE(handle, pipeprv) ? handle->pipeprv.fds[1] : handle->pipe.fd;
 
-    if (IS_ERR(bytes)) {
-        bytes = unix_to_pal_error(ERRNO(bytes));
-        if (bytes == -PAL_ERROR_TRYAGAIN)
-            HANDLE_HDR(handle)->flags &= ~writable;
-        return bytes;
-    }
-
-    if ((uint64_t)bytes == len)
-        HANDLE_HDR(handle)->flags |= writable;
-    else
-        HANDLE_HDR(handle)->flags &= ~writable;
+    int bytes = ocall_send(fd, buffer, len, NULL, 0, NULL, 0);
+    if (IS_ERR(bytes))
+        return unix_to_pal_error(ERRNO(bytes));
 
     return bytes;
 }
@@ -309,34 +298,51 @@ static int pipe_delete(PAL_HANDLE handle, int access) {
 static int pipe_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
     int ret;
 
-    if (handle->generic.fds[0] == PAL_IDX_POISON)
+    if (handle->pipe.fd == PAL_IDX_POISON)
         return -PAL_ERROR_BADHANDLE;
 
     attr->handle_type  = HANDLE_HDR(handle)->type;
     attr->nonblocking  = IS_HANDLE_TYPE(handle, pipeprv) ? handle->pipeprv.nonblocking
                                                          : handle->pipe.nonblocking;
     attr->disconnected = HANDLE_HDR(handle)->flags & ERROR(0);
-    attr->writable     = PAL_FALSE;
 
     /* get number of bytes available for reading (doesn't make sense for "listening" pipes) */
     attr->pending_size = 0;
     if (!IS_HANDLE_TYPE(handle, pipesrv)) {
-        ret = ocall_fionread(handle->generic.fds[0]);
+        ret = ocall_fionread(handle->pipe.fd);
         if (IS_ERR(ret))
             return unix_to_pal_error(ERRNO(ret));
 
         attr->pending_size = ret;
-        attr->writable     = HANDLE_HDR(handle)->flags & (IS_HANDLE_TYPE(handle, pipeprv)
-                                                              ? WRITABLE(1) : WRITABLE(0));
     }
 
-    /* query if there is data available for reading */
-    struct pollfd pfd = {.fd = handle->generic.fds[0], .events = POLLIN, .revents = 0};
-    ret = ocall_poll(&pfd, 1, 0);
-    if (IS_ERR(ret))
-        return unix_to_pal_error(ERRNO(ret));
+    /* query if there is data available for reading/writing */
+    if (IS_HANDLE_TYPE(handle, pipeprv)) {
+        /* for private pipe, readable and writable are queried on different fds */
+        struct pollfd pfd[2] = {{.fd = handle->pipeprv.fds[0], .events = POLLIN,  .revents = 0},
+                                {.fd = handle->pipeprv.fds[1], .events = POLLOUT, .revents = 0}};
+        ret = ocall_poll(&pfd[0], 2, 0);
+        if (IS_ERR(ret))
+            return unix_to_pal_error(ERRNO(ret));
+
+        attr->readable = ret >= 1 && (pfd[0].revents & (POLLIN | POLLERR | POLLHUP)) == POLLIN;
+        attr->writable = ret >= 1 && (pfd[1].revents & (POLLOUT | POLLERR | POLLHUP)) == POLLOUT;
+    } else {
+        /* for non-private pipes, both readable and writable are queried on the same fd */
+        short pfd_events = POLLIN;
+        if (!IS_HANDLE_TYPE(handle, pipesrv)) {
+            /* querying for writing doesn't make sense for "listening" pipes */
+            pfd_events |= POLLOUT;
+        }
 
-    attr->readable = (ret == 1 && pfd.revents == POLLIN);
+        struct pollfd pfd = {.fd = handle->pipe.fd, .events = pfd_events, .revents = 0};
+        ret = ocall_poll(&pfd, 1, 0);
+        if (IS_ERR(ret))
+            return unix_to_pal_error(ERRNO(ret));
+
+        attr->readable = ret == 1 && (pfd.revents & (POLLIN | POLLERR | POLLHUP)) == POLLIN;
+        attr->writable = ret == 1 && (pfd.revents & (POLLOUT | POLLERR | POLLHUP)) == POLLOUT;
+    }
 
     return 0;
 }

+ 7 - 18
Pal/src/host/Linux-SGX/db_process.c

@@ -267,7 +267,7 @@ int _DkProcessCreate (PAL_HANDLE * handle, const char * uri, const char ** args)
 
     PAL_HANDLE child = malloc(HANDLE_SIZE(process));
     SET_HANDLE_TYPE(child, process);
-    HANDLE_HDR(child)->flags |= RFD(0)|WFD(0)|RFD(1)|WFD(1)|WRITABLE(0)|WRITABLE(1);
+    HANDLE_HDR(child)->flags |= RFD(0)|WFD(0)|RFD(1)|WFD(1);
     child->process.stream      = stream_fd;
     child->process.cargo       = cargo_fd;
     child->process.pid         = child_pid;
@@ -315,7 +315,7 @@ int init_child_process (PAL_HANDLE * parent_handle)
 {
     PAL_HANDLE parent = malloc(HANDLE_SIZE(process));
     SET_HANDLE_TYPE(parent, process);
-    HANDLE_HDR(parent)->flags |= RFD(0)|WFD(0)|RFD(1)|WFD(1)|WRITABLE(0)|WRITABLE(1);
+    HANDLE_HDR(parent)->flags |= RFD(0)|WFD(0)|RFD(1)|WFD(1);
 
     parent->process.stream     = pal_sec.stream_fd;
     parent->process.cargo      = pal_sec.cargo_fd;
@@ -378,18 +378,8 @@ static int64_t proc_write (PAL_HANDLE handle, uint64_t offset, uint64_t count,
         return -PAL_ERROR_INVAL;
 
     int bytes = ocall_write(handle->process.stream, buffer, count);
-
-    if (IS_ERR(bytes)) {
-        bytes = unix_to_pal_error(ERRNO(bytes));
-        if (bytes == -PAL_ERROR_TRYAGAIN)
-            HANDLE_HDR(handle)->flags &= ~WRITABLE(0);
-        return bytes;
-    }
-
-    if ((uint64_t)bytes == count)
-        HANDLE_HDR(handle)->flags |= WRITABLE(0);
-    else
-        HANDLE_HDR(handle)->flags &= ~WRITABLE(0);
+    if (IS_ERR(bytes))
+        return unix_to_pal_error(ERRNO(bytes));
 
     return bytes;
 }
@@ -444,7 +434,6 @@ static int proc_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
     attr->handle_type  = HANDLE_HDR(handle)->type;
     attr->nonblocking  = handle->process.nonblocking;
     attr->disconnected = HANDLE_HDR(handle)->flags & ERROR(0);
-    attr->writable     = HANDLE_HDR(handle)->flags & WRITABLE(0);
 
     /* get number of bytes available for reading */
     ret = ocall_fionread(handle->process.stream);
@@ -454,13 +443,13 @@ static int proc_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
     attr->pending_size = ret;
 
     /* query if there is data available for reading */
-    struct pollfd pfd = {.fd = handle->process.stream, .events = POLLIN, .revents = 0};
+    struct pollfd pfd = {.fd = handle->process.stream, .events = POLLIN | POLLOUT, .revents = 0};
     ret = ocall_poll(&pfd, 1, 0);
     if (IS_ERR(ret))
         return unix_to_pal_error(ERRNO(ret));
 
-    attr->readable = (ret == 1 && pfd.revents == POLLIN);
-
+    attr->readable = ret == 1 && (pfd.revents & (POLLIN | POLLERR | POLLHUP)) == POLLIN;
+    attr->writable = ret == 1 && (pfd.revents & (POLLOUT | POLLERR | POLLHUP)) == POLLOUT;
     return 0;
 }
 

+ 14 - 59
Pal/src/host/Linux-SGX/db_sockets.c

@@ -496,18 +496,8 @@ static int64_t tcp_write(PAL_HANDLE handle, uint64_t offset, uint64_t len, const
         return -PAL_ERROR_INVAL;
 
     int bytes = ocall_send(handle->sock.fd, buf, len, NULL, 0, NULL, 0);
-
-    if (IS_ERR(bytes)) {
-        bytes = unix_to_pal_error(ERRNO(bytes));
-        if (bytes == -PAL_ERROR_TRYAGAIN)
-            HANDLE_HDR(handle)->flags &= ~WRITABLE(0);
-        return bytes;
-    }
-
-    if ((uint64_t)bytes == len)
-        HANDLE_HDR(handle)->flags |= WRITABLE(0);
-    else
-        HANDLE_HDR(handle)->flags &= ~WRITABLE(0);
+    if (IS_ERR(bytes))
+        return unix_to_pal_error(ERRNO(bytes));
 
     return bytes;
 }
@@ -681,18 +671,8 @@ static int64_t udp_send(PAL_HANDLE handle, uint64_t offset, uint64_t len, const
         return -PAL_ERROR_INVAL;
 
     int bytes = ocall_send(handle->sock.fd, buf, len, NULL, 0, NULL, 0);
-
-    if (IS_ERR(bytes)) {
-        bytes = unix_to_pal_error(ERRNO(bytes));
-        if (bytes == -PAL_ERROR_TRYAGAIN)
-            HANDLE_HDR(handle)->flags &= ~WRITABLE(0);
-        return bytes;
-    }
-
-    if ((uint64_t)bytes == len)
-        HANDLE_HDR(handle)->flags |= WRITABLE(0);
-    else
-        HANDLE_HDR(handle)->flags &= ~WRITABLE(0);
+    if (IS_ERR(bytes))
+        return unix_to_pal_error(ERRNO(bytes));
 
     return bytes;
 }
@@ -728,18 +708,8 @@ static int64_t udp_sendbyaddr(PAL_HANDLE handle, uint64_t offset, uint64_t len,
         return ret;
 
     int bytes = ocall_send(handle->sock.fd, buf, len, &conn_addr, conn_addrlen, NULL, 0);
-
-    if (IS_ERR(bytes)) {
-        bytes = unix_to_pal_error(ERRNO(bytes));
-        if (bytes == -PAL_ERROR_TRYAGAIN)
-            HANDLE_HDR(handle)->flags &= ~WRITABLE(0);
-        return bytes;
-    }
-
-    if ((uint64_t)bytes == len)
-        HANDLE_HDR(handle)->flags |= WRITABLE(0);
-    else
-        HANDLE_HDR(handle)->flags &= ~WRITABLE(0);
+    if (IS_ERR(bytes))
+        return unix_to_pal_error(ERRNO(bytes));
 
     return bytes;
 }
@@ -797,7 +767,6 @@ static int socket_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
     attr->handle_type           = HANDLE_HDR(handle)->type;
     attr->nonblocking           = handle->sock.nonblocking;
     attr->disconnected          = HANDLE_HDR(handle)->flags & ERROR(0);
-    attr->writable              = HANDLE_HDR(handle)->flags & WRITABLE(0);
 
     attr->socket.linger         = handle->sock.linger;
     attr->socket.receivebuf     = handle->sock.receivebuf;
@@ -819,13 +788,13 @@ static int socket_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
     }
 
     /* query if there is data available for reading */
-    struct pollfd pfd = {.fd = handle->sock.fd, .events = POLLIN, .revents = 0};
+    struct pollfd pfd = {.fd = handle->sock.fd, .events = POLLIN | POLLOUT, .revents = 0};
     ret = ocall_poll(&pfd, 1, 0);
     if (IS_ERR(ret))
         return unix_to_pal_error(ERRNO(ret));
 
-    attr->readable = (ret == 1 && pfd.revents == POLLIN);
-
+    attr->readable = ret == 1 && (pfd.revents & (POLLIN | POLLERR | POLLHUP)) == POLLIN;
+    attr->writable = ret == 1 && (pfd.revents & (POLLOUT | POLLERR | POLLHUP)) == POLLOUT;
     return 0;
 }
 
@@ -1042,7 +1011,7 @@ struct handle_ops udpsrv_ops = {
 PAL_HANDLE _DkBroadcastStreamOpen(void) {
     PAL_HANDLE hdl = malloc(HANDLE_SIZE(file));
     SET_HANDLE_TYPE(hdl, mcast);
-    HANDLE_HDR(hdl)->flags |= RFD(0) | WFD(1) | WRITABLE(1);
+    HANDLE_HDR(hdl)->flags |= RFD(0) | WFD(1);
     hdl->mcast.port = pal_sec.mcast_port;
     hdl->mcast.srv  = pal_sec.mcast_srv;
     hdl->mcast.cli  = pal_sec.mcast_cli;
@@ -1060,18 +1029,8 @@ static int64_t mcast_send(PAL_HANDLE handle, uint64_t offset, uint64_t size, con
         return -PAL_ERROR_INVAL;
 
     int bytes = ocall_send(handle->mcast.srv, buf, size, NULL, 0, NULL, 0);
-
-    if (IS_ERR(bytes)) {
-        bytes = unix_to_pal_error(ERRNO(bytes));
-        if (bytes == -PAL_ERROR_TRYAGAIN)
-            HANDLE_HDR(handle)->flags &= ~WRITABLE(1);
-        return bytes;
-    }
-
-    if ((uint64_t)bytes == size)
-        HANDLE_HDR(handle)->flags |= WRITABLE(1);
-    else
-        HANDLE_HDR(handle)->flags &= ~WRITABLE(1);
+    if (IS_ERR(bytes))
+        return unix_to_pal_error(ERRNO(bytes));
 
     return bytes;
 }
@@ -1087,12 +1046,8 @@ static int64_t mcast_receive(PAL_HANDLE handle, uint64_t offset, uint64_t size,
         return -PAL_ERROR_INVAL;
 
     int bytes = ocall_recv(handle->mcast.cli, buf, size, NULL, NULL, NULL, NULL);
-
     if (IS_ERR(bytes))
-        bytes = unix_to_pal_error(ERRNO(bytes));
-
-    if (bytes == -PAL_ERROR_TRYAGAIN)
-        HANDLE_HDR(handle)->flags &= ~WRITABLE(1);
+        return unix_to_pal_error(ERRNO(bytes));
 
     return bytes;
 }
@@ -1121,7 +1076,7 @@ static int mcast_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
     attr->pending_size = ret;
     attr->disconnected = HANDLE_HDR(handle)->flags & (ERROR(0) | ERROR(1));
     attr->readable     = (attr->pending_size > 0);
-    attr->writable     = HANDLE_HDR(handle)->flags & WRITABLE(1);
+    attr->writable     = PAL_TRUE;
     attr->nonblocking  = handle->mcast.nonblocking;
     return 0;
 }

+ 1 - 3
Pal/src/host/Linux-SGX/pal_host.h

@@ -179,9 +179,7 @@ typedef struct pal_handle
 
 #define RFD(n)          (1 << (MAX_FDS*0 + (n)))
 #define WFD(n)          (1 << (MAX_FDS*1 + (n)))
-#define WRITABLE(n)     (1 << (MAX_FDS*2 + (n)))
-#define ERROR(n)        (1 << (MAX_FDS*3 + (n)))
-#define HAS_FDS         ((1 << MAX_FDS*2) - 1)
+#define ERROR(n)        (1 << (MAX_FDS*2 + (n)))
 
 #define HANDLE_TYPE(handle)  ((handle)->hdr.type)
 

+ 6 - 20
Pal/src/host/Linux/db_eventfd.c

@@ -38,10 +38,6 @@
 #include "pal_linux_error.h"
 #include "pal_security.h"
 
-#ifndef FIONREAD
-#define FIONREAD 0x541B
-#endif
-
 static inline int eventfd_type(int options) {
     int type = 0;
     if (options & PAL_OPTION_NONBLOCK)
@@ -79,7 +75,7 @@ static int eventfd_pal_open(PAL_HANDLE* handle, const char* type, const char* ur
     SET_HANDLE_TYPE(hdl, eventfd);
 
     /* Note: using index 0, given that there is only 1 eventfd FD per pal-handle. */
-    HANDLE_HDR(hdl)->flags = RFD(0) | WFD(0) | WRITABLE(0);
+    HANDLE_HDR(hdl)->flags = RFD(0) | WFD(0);
 
     hdl->eventfd.fd          = ret;
     hdl->eventfd.nonblocking = (options & PAL_OPTION_NONBLOCK) ? PAL_TRUE : PAL_FALSE;
@@ -120,19 +116,9 @@ static int64_t eventfd_pal_write(PAL_HANDLE handle, uint64_t offset, uint64_t le
     if (len < sizeof(uint64_t))
         return -PAL_ERROR_INVAL;
 
-    int bytes        = INLINE_SYSCALL(write, 3, handle->eventfd.fd, buffer, len);
-    PAL_FLG writable = WRITABLE(0);
-
-    if (IS_ERR(bytes)) {
-        if (ERRNO(bytes) == EAGAIN)
-            HANDLE_HDR(handle)->flags &= ~writable;
+    int bytes = INLINE_SYSCALL(write, 3, handle->eventfd.fd, buffer, len);
+    if (IS_ERR(bytes))
         return unix_to_pal_error(ERRNO(bytes));
-    }
-
-    if ((uint64_t)bytes == sizeof(uint64_t))
-        HANDLE_HDR(handle)->flags |= writable;
-    else
-        HANDLE_HDR(handle)->flags &= ~writable;
 
     return bytes;
 }
@@ -147,7 +133,6 @@ static int eventfd_pal_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr)
     attr->handle_type  = HANDLE_HDR(handle)->type;
     attr->nonblocking  = handle->eventfd.nonblocking;
     attr->disconnected = HANDLE_HDR(handle)->flags & ERROR(0);
-    attr->writable     = HANDLE_HDR(handle)->flags & WRITABLE(0);
 
     /* get number of bytes available for reading */
     ret = INLINE_SYSCALL(ioctl, 3, handle->eventfd.fd, FIONREAD, &val);
@@ -157,13 +142,14 @@ static int eventfd_pal_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr)
     attr->pending_size = val;
 
     /* query if there is data available for reading */
-    struct pollfd pfd  = {.fd = handle->eventfd.fd, .events = POLLIN, .revents = 0};
+    struct pollfd pfd  = {.fd = handle->eventfd.fd, .events = POLLIN | POLLOUT, .revents = 0};
     struct timespec tp = {0, 0};
     ret = INLINE_SYSCALL(ppoll, 5, &pfd, 1, &tp, NULL, 0);
     if (IS_ERR(ret))
         return unix_to_pal_error(ERRNO(ret));
 
-    attr->readable = (ret == 1 && pfd.revents == POLLIN);
+    attr->readable = ret == 1 && (pfd.revents & (POLLIN | POLLERR | POLLHUP)) == POLLIN;
+    attr->writable = ret == 1 && (pfd.revents & (POLLOUT | POLLERR | POLLHUP)) == POLLOUT;
 
     /* For future use, so that Linux host kernel can send notifications to user-space apps. App
      * receives virtual FD from LibOS, but the Linux-host eventfd is memorized here, such that this

+ 1 - 1
Pal/src/host/Linux/db_files.c

@@ -56,7 +56,7 @@ static int file_open (PAL_HANDLE * handle, const char * type, const char * uri,
     size_t len = strlen(uri);
     PAL_HANDLE hdl = malloc(HANDLE_SIZE(file) + len + 1);
     SET_HANDLE_TYPE(hdl, file);
-    HANDLE_HDR(hdl)->flags |= RFD(0)|WFD(0)|WRITABLE(0);
+    HANDLE_HDR(hdl)->flags |= RFD(0)|WFD(0);
     hdl->file.fd = ret;
     hdl->file.offset = 0;
     hdl->file.map_start = NULL;

+ 1 - 1
Pal/src/host/Linux/db_main.c

@@ -289,7 +289,7 @@ void pal_linux_main (void * args)
     size_t len = strlen(argv[0]) + 1;
     PAL_HANDLE file = malloc(HANDLE_SIZE(file) + len);
     SET_HANDLE_TYPE(file, file);
-    HANDLE_HDR(file)->flags |= RFD(0)|WFD(0)|WRITABLE(0);
+    HANDLE_HDR(file)->flags |= RFD(0)|WFD(0);
     file->file.fd = fd;
     file->file.offset = 0;
     file->file.map_start = NULL;

+ 0 - 2
Pal/src/host/Linux/db_object.c

@@ -155,8 +155,6 @@ int _DkStreamsWaitEvents(size_t count, PAL_HANDLE* handle_array, PAL_FLG* events
         for (size_t k = 0; k < MAX_FDS; k++) {
             if (hdl->generic.fds[k] != (PAL_IDX)fds[i].fd)
                 continue;
-            if (fds[i].revents & POLLOUT)
-                HANDLE_HDR(hdl)->flags |= WRITABLE(k);
             if (fds[i].revents & (POLLHUP|POLLERR|POLLNVAL))
                 HANDLE_HDR(hdl)->flags |= ERROR(k);
         }

+ 33 - 27
Pal/src/host/Linux/db_pipes.c

@@ -40,10 +40,6 @@ typedef __kernel_pid_t pid_t;
 #include <linux/un.h>
 #include <sys/socket.h>
 
-#ifndef FIONREAD
-#define FIONREAD 0x541B
-#endif
-
 static int pipe_path(int pipeid, char* path, int len) {
     /* use abstract UNIX sockets for pipes */
     memset(path, 0, len);
@@ -122,7 +118,7 @@ static int pipe_waitforclient(PAL_HANDLE handle, PAL_HANDLE* client) {
 
     PAL_HANDLE clnt = malloc(HANDLE_SIZE(pipe));
     SET_HANDLE_TYPE(clnt, pipecli);
-    HANDLE_HDR(clnt)->flags |= RFD(0) | WFD(0) | WRITABLE(0);
+    HANDLE_HDR(clnt)->flags |= RFD(0) | WFD(0);
     clnt->pipe.fd          = newfd;
     clnt->pipe.pipeid      = handle->pipe.pipeid;
     clnt->pipe.nonblocking = PAL_FALSE;
@@ -160,7 +156,7 @@ static int pipe_connect(PAL_HANDLE* handle, PAL_NUM pipeid, int options) {
 
     PAL_HANDLE hdl = malloc(HANDLE_SIZE(pipe));
     SET_HANDLE_TYPE(hdl, pipe);
-    HANDLE_HDR(hdl)->flags |= RFD(0) | WFD(0) | WRITABLE(0);
+    HANDLE_HDR(hdl)->flags |= RFD(0) | WFD(0);
     hdl->pipe.fd          = fd;
     hdl->pipe.pipeid      = pipeid;
     hdl->pipe.nonblocking = (options & PAL_OPTION_NONBLOCK) ? PAL_TRUE : PAL_FALSE;
@@ -178,7 +174,7 @@ static int pipe_private(PAL_HANDLE* handle, int options) {
 
     PAL_HANDLE hdl = malloc(HANDLE_SIZE(pipeprv));
     SET_HANDLE_TYPE(hdl, pipeprv);
-    HANDLE_HDR(hdl)->flags |= RFD(0) | WFD(1) | WRITABLE(1);
+    HANDLE_HDR(hdl)->flags |= RFD(0) | WFD(1);
     hdl->pipeprv.fds[0]      = fds[0];
     hdl->pipeprv.fds[1]      = fds[1];
     hdl->pipeprv.nonblocking = (options & PAL_OPTION_NONBLOCK) ? PAL_TRUE : PAL_FALSE;
@@ -250,14 +246,6 @@ static int64_t pipe_write(PAL_HANDLE handle, uint64_t offset, size_t len, const
     int64_t bytes = 0;
 
     bytes = INLINE_SYSCALL(write, 3, fd, buffer, len);
-
-    PAL_FLG writable = IS_HANDLE_TYPE(handle, pipeprv) ? WRITABLE(1) : WRITABLE(0);
-
-    if (!IS_ERR(bytes) && (size_t)bytes == len)
-        HANDLE_HDR(handle)->flags |= writable;
-    else
-        HANDLE_HDR(handle)->flags &= ~writable;
-
     if (IS_ERR(bytes))
         bytes = unix_to_pal_error(ERRNO(bytes));
 
@@ -344,35 +332,53 @@ static int pipe_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
     int ret;
     int val;
 
-    if (handle->generic.fds[0] == PAL_IDX_POISON)
+    if (handle->pipe.fd == PAL_IDX_POISON)
         return -PAL_ERROR_BADHANDLE;
 
     attr->handle_type  = HANDLE_HDR(handle)->type;
     attr->nonblocking  = IS_HANDLE_TYPE(handle, pipeprv) ? handle->pipeprv.nonblocking
                                                          : handle->pipe.nonblocking;
     attr->disconnected = HANDLE_HDR(handle)->flags & ERROR(0);
-    attr->writable     = PAL_FALSE;
 
     /* get number of bytes available for reading (doesn't make sense for "listening" pipes) */
     attr->pending_size = 0;
     if (!IS_HANDLE_TYPE(handle, pipesrv)) {
-        ret = INLINE_SYSCALL(ioctl, 3, handle->generic.fds[0], FIONREAD, &val);
+        ret = INLINE_SYSCALL(ioctl, 3, handle->pipe.fd, FIONREAD, &val);
         if (IS_ERR(ret))
             return unix_to_pal_error(ERRNO(ret));
 
         attr->pending_size = val;
-        attr->writable     = HANDLE_HDR(handle)->flags & (IS_HANDLE_TYPE(handle, pipeprv)
-                                                              ? WRITABLE(1) : WRITABLE(0));
     }
 
-    /* query if there is data available for reading */
-    struct pollfd pfd  = {.fd = handle->generic.fds[0], .events = POLLIN, .revents = 0};
-    struct timespec tp = {0, 0};
-    ret = INLINE_SYSCALL(ppoll, 5, &pfd, 1, &tp, NULL, 0);
-    if (IS_ERR(ret))
-        return unix_to_pal_error(ERRNO(ret));
+    /* query if there is data available for reading/writing */
+    if (IS_HANDLE_TYPE(handle, pipeprv)) {
+        /* for private pipe, readable and writable are queried on different fds */
+        struct pollfd pfd[2] = {{.fd = handle->pipeprv.fds[0], .events = POLLIN,  .revents = 0},
+                                {.fd = handle->pipeprv.fds[1], .events = POLLOUT, .revents = 0}};
+        struct timespec tp   = {0, 0};
+        ret = INLINE_SYSCALL(ppoll, 5, &pfd, 2, &tp, NULL, 0);
+        if (IS_ERR(ret))
+            return unix_to_pal_error(ERRNO(ret));
+
+        attr->readable = ret >= 1 && (pfd[0].revents & (POLLIN | POLLERR | POLLHUP)) == POLLIN;
+        attr->writable = ret >= 1 && (pfd[1].revents & (POLLOUT | POLLERR | POLLHUP)) == POLLOUT;
+    } else {
+        /* for non-private pipes, both readable and writable are queried on the same fd */
+        short pfd_events = POLLIN;
+        if (!IS_HANDLE_TYPE(handle, pipesrv)) {
+            /* querying for writing doesn't make sense for "listening" pipes */
+            pfd_events |= POLLOUT;
+        }
 
-    attr->readable = (ret == 1 && pfd.revents == POLLIN);
+        struct pollfd pfd  = {.fd = handle->pipe.fd, .events = pfd_events, .revents = 0};
+        struct timespec tp = {0, 0};
+        ret = INLINE_SYSCALL(ppoll, 5, &pfd, 1, &tp, NULL, 0);
+        if (IS_ERR(ret))
+            return unix_to_pal_error(ERRNO(ret));
+
+        attr->readable = ret == 1 && (pfd.revents & (POLLIN | POLLERR | POLLHUP)) == POLLIN;
+        attr->writable = ret == 1 && (pfd.revents & (POLLOUT | POLLERR | POLLHUP)) == POLLOUT;
+    }
 
     return 0;
 }

+ 5 - 16
Pal/src/host/Linux/db_process.c

@@ -48,10 +48,6 @@ typedef __kernel_pid_t pid_t;
 # define SEEK_SET 0
 #endif
 
-#ifndef FIONREAD
-# define FIONREAD 0x541B
-#endif
-
 static inline int create_process_handle (PAL_HANDLE * parent,
                                          PAL_HANDLE * child)
 {
@@ -73,7 +69,7 @@ static inline int create_process_handle (PAL_HANDLE * parent,
     }
 
     SET_HANDLE_TYPE(phdl, process);
-    HANDLE_HDR(phdl)->flags |= RFD(0)|WFD(0)|RFD(1)|WFD(1)|WRITABLE(0)|WRITABLE(1);
+    HANDLE_HDR(phdl)->flags |= RFD(0)|WFD(0)|RFD(1)|WFD(1);
     phdl->process.stream      = fds[0];
     phdl->process.cargo       = fds[2];
     phdl->process.pid         = linux_state.pid;
@@ -86,7 +82,7 @@ static inline int create_process_handle (PAL_HANDLE * parent,
     }
 
     SET_HANDLE_TYPE(chdl, process);
-    HANDLE_HDR(chdl)->flags |= RFD(0)|WFD(0)|RFD(1)|WFD(1)|WRITABLE(0)|WRITABLE(1);
+    HANDLE_HDR(chdl)->flags |= RFD(0)|WFD(0)|RFD(1)|WFD(1);
     chdl->process.stream      = fds[1];
     chdl->process.cargo       = fds[3];
     chdl->process.pid         = 0; /* unknown yet */
@@ -468,7 +464,6 @@ static int64_t proc_write (PAL_HANDLE handle, uint64_t offset, uint64_t count,
     if (IS_ERR(bytes))
         switch(ERRNO(bytes)) {
             case EWOULDBLOCK:
-                HANDLE_HDR(handle)->flags &= ~WRITABLE(0);
                 return -PAL_ERROR_TRYAGAIN;
             case EINTR:
                 return -PAL_ERROR_INTERRUPTED;
@@ -477,11 +472,6 @@ static int64_t proc_write (PAL_HANDLE handle, uint64_t offset, uint64_t count,
         }
 
     assert(!IS_ERR(bytes));
-    if ((size_t)bytes == count)
-        HANDLE_HDR(handle)->flags |= WRITABLE(0);
-    else
-        HANDLE_HDR(handle)->flags &= ~WRITABLE(0);
-
     return bytes;
 }
 
@@ -536,7 +526,6 @@ static int proc_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
     attr->handle_type  = HANDLE_HDR(handle)->type;
     attr->nonblocking  = handle->process.nonblocking;
     attr->disconnected = HANDLE_HDR(handle)->flags & ERROR(0);
-    attr->writable     = HANDLE_HDR(handle)->flags & WRITABLE(0);
 
     /* get number of bytes available for reading */
     ret = INLINE_SYSCALL(ioctl, 3, handle->process.stream, FIONREAD, &val);
@@ -546,14 +535,14 @@ static int proc_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
     attr->pending_size = val;
 
     /* query if there is data available for reading */
-    struct pollfd pfd  = {.fd = handle->process.stream, .events = POLLIN, .revents = 0};
+    struct pollfd pfd  = {.fd = handle->process.stream, .events = POLLIN | POLLOUT, .revents = 0};
     struct timespec tp = {0, 0};
     ret = INLINE_SYSCALL(ppoll, 5, &pfd, 1, &tp, NULL, 0);
     if (IS_ERR(ret))
         return unix_to_pal_error(ERRNO(ret));
 
-    attr->readable = (ret == 1 && pfd.revents == POLLIN);
-
+    attr->readable = ret == 1 && (pfd.revents & (POLLIN | POLLERR | POLLHUP)) == POLLIN;
+    attr->writable = ret == 1 && (pfd.revents & (POLLOUT | POLLERR | POLLHUP)) == POLLOUT;
     return 0;
 }
 

+ 5 - 36
Pal/src/host/Linux/db_sockets.c

@@ -58,10 +58,6 @@ typedef __kernel_pid_t pid_t;
 #define SOL_IPV6 41
 #endif
 
-#ifndef FIONREAD
-#define FIONREAD 0x541B
-#endif
-
 /* 96 bytes is the minimal size of buffer to store a IPv4/IPv6
    address */
 #define PAL_SOCKADDR_SIZE 96
@@ -608,12 +604,6 @@ static int64_t tcp_write(PAL_HANDLE handle, uint64_t offset, size_t len, const v
     hdr.msg_flags      = 0;
 
     int64_t bytes = INLINE_SYSCALL(sendmsg, 3, handle->sock.fd, &hdr, MSG_NOSIGNAL);
-
-    if (!IS_ERR(bytes) && (size_t)bytes == len)
-        HANDLE_HDR(handle)->flags |= WRITABLE(0);
-    else
-        HANDLE_HDR(handle)->flags &= ~WRITABLE(0);
-
     if (IS_ERR(bytes))
         bytes = unix_to_pal_error(ERRNO(bytes));
 
@@ -860,12 +850,6 @@ static int64_t udp_send(PAL_HANDLE handle, uint64_t offset, size_t len, const vo
     hdr.msg_flags      = 0;
 
     int64_t bytes = INLINE_SYSCALL(sendmsg, 3, handle->sock.fd, &hdr, MSG_NOSIGNAL);
-
-    if (!IS_ERR(bytes) && (size_t)bytes == len)
-        HANDLE_HDR(handle)->flags |= WRITABLE(0);
-    else
-        HANDLE_HDR(handle)->flags &= ~WRITABLE(0);
-
     if (IS_ERR(bytes))
         bytes = unix_to_pal_error(ERRNO(bytes));
 
@@ -912,12 +896,6 @@ static int64_t udp_sendbyaddr(PAL_HANDLE handle, uint64_t offset, size_t len, co
     hdr.msg_flags      = 0;
 
     int64_t bytes = INLINE_SYSCALL(sendmsg, 3, handle->sock.fd, &hdr, MSG_NOSIGNAL);
-
-    if (!IS_ERR(bytes) && (size_t)bytes == len)
-        HANDLE_HDR(handle)->flags |= WRITABLE(0);
-    else
-        HANDLE_HDR(handle)->flags &= ~WRITABLE(0);
-
     if (IS_ERR(bytes))
         bytes = unix_to_pal_error(ERRNO(bytes));
 
@@ -982,7 +960,6 @@ static int socket_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
     attr->handle_type           = HANDLE_HDR(handle)->type;
     attr->nonblocking           = handle->sock.nonblocking;
     attr->disconnected          = HANDLE_HDR(handle)->flags & ERROR(0);
-    attr->writable              = HANDLE_HDR(handle)->flags & WRITABLE(0);
 
     attr->socket.linger         = handle->sock.linger;
     attr->socket.receivebuf     = handle->sock.receivebuf;
@@ -1005,14 +982,14 @@ static int socket_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
     }
 
     /* query if there is data available for reading */
-    struct pollfd pfd  = {.fd = handle->sock.fd, .events = POLLIN, .revents = 0};
+    struct pollfd pfd  = {.fd = handle->sock.fd, .events = POLLIN | POLLOUT, .revents = 0};
     struct timespec tp = {0, 0};
     ret = INLINE_SYSCALL(ppoll, 5, &pfd, 1, &tp, NULL, 0);
     if (IS_ERR(ret))
         return unix_to_pal_error(ERRNO(ret));
 
-    attr->readable = (ret == 1 && pfd.revents == POLLIN);
-
+    attr->readable = ret == 1 && (pfd.revents & (POLLIN | POLLERR | POLLHUP)) == POLLIN;
+    attr->writable = ret == 1 && (pfd.revents & (POLLOUT | POLLERR | POLLHUP)) == POLLOUT;
     return 0;
 }
 
@@ -1285,7 +1262,7 @@ PAL_HANDLE _DkBroadcastStreamOpen(void) {
 
     PAL_HANDLE hdl = malloc(HANDLE_SIZE(mcast));
     SET_HANDLE_TYPE(hdl, mcast);
-    HANDLE_HDR(hdl)->flags |= WFD(1) | WRITABLE(1);
+    HANDLE_HDR(hdl)->flags |= WFD(1);
     hdl->mcast.srv         = srv;
     hdl->mcast.cli         = cli;
     hdl->mcast.port        = (PAL_NUM)pal_sec.mcast_port;
@@ -1327,19 +1304,11 @@ static int64_t mcast_send(PAL_HANDLE handle, uint64_t offset, uint64_t size, con
             case ECONNRESET:
             case EPIPE:
                 return -PAL_ERROR_CONNFAILED;
-            case EAGAIN:
-                HANDLE_HDR(handle)->flags &= ~WRITABLE(1);
-                /* fallthrough */
             default:
                 return unix_to_pal_error(ERRNO(bytes));
         }
 
     assert(!IS_ERR(bytes));
-    if ((size_t)bytes == size)
-        HANDLE_HDR(handle)->flags |= WRITABLE(1);
-    else
-        HANDLE_HDR(handle)->flags &= ~WRITABLE(1);
-
     return bytes;
 }
 
@@ -1384,7 +1353,7 @@ static int mcast_attrquerybyhdl(PAL_HANDLE handle, PAL_STREAM_ATTR* attr) {
     attr->disconnected = HANDLE_HDR(handle)->flags & (ERROR(0) | ERROR(1));
     attr->nonblocking  = handle->mcast.nonblocking;
     attr->readable     = !!val;
-    attr->writable     = HANDLE_HDR(handle)->flags & WRITABLE(1);
+    attr->writable     = PAL_TRUE;
     attr->runnable     = PAL_FALSE;
     attr->pending_size = val;
 

+ 1 - 3
Pal/src/host/Linux/pal_host.h

@@ -168,9 +168,7 @@ typedef struct pal_handle
 
 #define RFD(n)          (1 << (MAX_FDS*0 + (n)))
 #define WFD(n)          (1 << (MAX_FDS*1 + (n)))
-#define WRITABLE(n)     (1 << (MAX_FDS*2 + (n)))
-#define ERROR(n)        (1 << (MAX_FDS*3 + (n)))
-#define HAS_FDS         ((1 << MAX_FDS*2) - 1)
+#define ERROR(n)        (1 << (MAX_FDS*2 + (n)))
 
 #define HANDLE_TYPE(handle)  ((handle)->hdr.type)
 

+ 4 - 0
Pal/src/host/Linux/pal_linux_defs.h

@@ -14,4 +14,8 @@
 
 #define BLOCK_SIGFAULT 0
 
+#ifndef FIONREAD
+#define FIONREAD 0x541B
+#endif
+
 #endif /* PAL_LINUX_DEFS_H */