summary refs log tree commit diff stats
path: root/include
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-03-11 14:41:27 +0000
committerPeter Maydell <peter.maydell@linaro.org>2020-03-11 14:41:27 +0000
commit6e8a73e911f066527e775e04b98f31ebd19db600 (patch)
treee7e14f8a09d10c275e4d8164b8b3091fedcdbc46 /include
parentba29883206d92a29ad5a466e679ccfc2ee6132ef (diff)
parentd37d0e365afb6825a90d8356fc6adcc1f58f40f3 (diff)
downloadfocaccia-qemu-6e8a73e911f066527e775e04b98f31ebd19db600.tar.gz
focaccia-qemu-6e8a73e911f066527e775e04b98f31ebd19db600.zip
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
Pull request

# gpg: Signature made Wed 11 Mar 2020 12:40:36 GMT
# gpg:                using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full]
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>" [full]
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  aio-posix: remove idle poll handlers to improve scalability
  aio-posix: support userspace polling of fd monitoring
  aio-posix: add io_uring fd monitoring implementation
  aio-posix: simplify FDMonOps->update() prototype
  aio-posix: extract ppoll(2) and epoll(7) fd monitoring
  aio-posix: move RCU_READ_LOCK() into run_poll_handlers()
  aio-posix: completely stop polling when disabled
  aio-posix: remove confusing QLIST_SAFE_REMOVE()
  qemu/queue.h: clear linked list pointers on remove

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'include')
-rw-r--r--include/block/aio.h71
-rw-r--r--include/qemu/queue.h19
2 files changed, 84 insertions, 6 deletions
diff --git a/include/block/aio.h b/include/block/aio.h
index 9dd61cee7e..cb1989105a 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -14,6 +14,9 @@
 #ifndef QEMU_AIO_H
 #define QEMU_AIO_H
 
+#ifdef CONFIG_LINUX_IO_URING
+#include <liburing.h>
+#endif
 #include "qemu/queue.h"
 #include "qemu/event_notifier.h"
 #include "qemu/thread.h"
@@ -52,6 +55,56 @@ struct ThreadPool;
 struct LinuxAioState;
 struct LuringState;
 
+/* Is polling disabled? */
+bool aio_poll_disabled(AioContext *ctx);
+
+/* Callbacks for file descriptor monitoring implementations */
+typedef struct {
+    /*
+     * update:
+     * @ctx: the AioContext
+     * @old_node: the existing handler or NULL if this file descriptor is being
+     *            monitored for the first time
+     * @new_node: the new handler or NULL if this file descriptor is being
+     *            removed
+     *
+     * Add/remove/modify a monitored file descriptor.
+     *
+     * Called with ctx->list_lock acquired.
+     */
+    void (*update)(AioContext *ctx, AioHandler *old_node, AioHandler *new_node);
+
+    /*
+     * wait:
+     * @ctx: the AioContext
+     * @ready_list: list for handlers that become ready
+     * @timeout: maximum duration to wait, in nanoseconds
+     *
+     * Wait for file descriptors to become ready and place them on ready_list.
+     *
+     * Called with ctx->list_lock incremented but not locked.
+     *
+     * Returns: number of ready file descriptors.
+     */
+    int (*wait)(AioContext *ctx, AioHandlerList *ready_list, int64_t timeout);
+
+    /*
+     * need_wait:
+     * @ctx: the AioContext
+     *
+     * Tell aio_poll() when to stop userspace polling early because ->wait()
+     * has fds ready.
+     *
+     * File descriptor monitoring implementations that cannot poll fd readiness
+     * from userspace should use aio_poll_disabled() here.  This ensures that
+     * file descriptors are not starved by handlers that frequently make
+     * progress via userspace polling.
+     *
+     * Returns: true if ->wait() should be called, false otherwise.
+     */
+    bool (*need_wait)(AioContext *ctx);
+} FDMonOps;
+
 /*
  * Each aio_bh_poll() call carves off a slice of the BH list, so that newly
  * scheduled BHs are not processed until the next aio_bh_poll() call.  All
@@ -65,6 +118,8 @@ struct BHListSlice {
     QSIMPLEQ_ENTRY(BHListSlice) next;
 };
 
+typedef QSLIST_HEAD(, AioHandler) AioHandlerSList;
+
 struct AioContext {
     GSource source;
 
@@ -150,6 +205,10 @@ struct AioContext {
      * locking.
      */
     struct LuringState *linux_io_uring;
+
+    /* State for file descriptor monitoring using Linux io_uring */
+    struct io_uring fdmon_io_uring;
+    AioHandlerSList submit_list;
 #endif
 
     /* TimerLists for calling timers - one per clock type.  Has its own
@@ -168,13 +227,21 @@ struct AioContext {
     int64_t poll_grow;      /* polling time growth factor */
     int64_t poll_shrink;    /* polling time shrink factor */
 
+    /*
+     * List of handlers participating in userspace polling.  Protected by
+     * ctx->list_lock.  Iterated and modified mostly by the event loop thread
+     * from aio_poll() with ctx->list_lock incremented.  aio_set_fd_handler()
+     * only touches the list to delete nodes if ctx->list_lock's count is zero.
+     */
+    AioHandlerList poll_aio_handlers;
+
     /* Are we in polling mode or monitoring file descriptors? */
     bool poll_started;
 
     /* epoll(7) state used when built with CONFIG_EPOLL */
     int epollfd;
-    bool epoll_enabled;
-    bool epoll_available;
+
+    const FDMonOps *fdmon_ops;
 };
 
 /**
diff --git a/include/qemu/queue.h b/include/qemu/queue.h
index 294db54eb1..456a5b01ee 100644
--- a/include/qemu/queue.h
+++ b/include/qemu/queue.h
@@ -142,6 +142,8 @@ struct {                                                                \
                 (elm)->field.le_next->field.le_prev =                   \
                     (elm)->field.le_prev;                               \
         *(elm)->field.le_prev = (elm)->field.le_next;                   \
+        (elm)->field.le_next = NULL;                                    \
+        (elm)->field.le_prev = NULL;                                    \
 } while (/*CONSTCOND*/0)
 
 /*
@@ -225,12 +227,15 @@ struct {                                                                \
 } while (/*CONSTCOND*/0)
 
 #define QSLIST_REMOVE_HEAD(head, field) do {                             \
-        (head)->slh_first = (head)->slh_first->field.sle_next;          \
+        typeof((head)->slh_first) elm = (head)->slh_first;               \
+        (head)->slh_first = elm->field.sle_next;                         \
+        elm->field.sle_next = NULL;                                      \
 } while (/*CONSTCOND*/0)
 
 #define QSLIST_REMOVE_AFTER(slistelm, field) do {                       \
-        (slistelm)->field.sle_next =                                    \
-            QSLIST_NEXT(QSLIST_NEXT((slistelm), field), field);         \
+        typeof(slistelm) next = (slistelm)->field.sle_next;             \
+        (slistelm)->field.sle_next = next->field.sle_next;              \
+        next->field.sle_next = NULL;                                    \
 } while (/*CONSTCOND*/0)
 
 #define QSLIST_REMOVE(head, elm, type, field) do {                      \
@@ -241,6 +246,7 @@ struct {                                                                \
         while (curelm->field.sle_next != (elm))                         \
             curelm = curelm->field.sle_next;                            \
         curelm->field.sle_next = curelm->field.sle_next->field.sle_next; \
+        (elm)->field.sle_next = NULL;                                   \
     }                                                                   \
 } while (/*CONSTCOND*/0)
 
@@ -304,8 +310,10 @@ struct {                                                                \
 } while (/*CONSTCOND*/0)
 
 #define QSIMPLEQ_REMOVE_HEAD(head, field) do {                          \
-    if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL)\
+    typeof((head)->sqh_first) elm = (head)->sqh_first;                  \
+    if (((head)->sqh_first = elm->field.sqe_next) == NULL)              \
         (head)->sqh_last = &(head)->sqh_first;                          \
+    elm->field.sqe_next = NULL;                                         \
 } while (/*CONSTCOND*/0)
 
 #define QSIMPLEQ_SPLIT_AFTER(head, elm, field, removed) do {            \
@@ -329,6 +337,7 @@ struct {                                                                \
         if ((curelm->field.sqe_next =                                   \
             curelm->field.sqe_next->field.sqe_next) == NULL)            \
                 (head)->sqh_last = &(curelm)->field.sqe_next;           \
+        (elm)->field.sqe_next = NULL;                                   \
     }                                                                   \
 } while (/*CONSTCOND*/0)
 
@@ -446,6 +455,8 @@ union {                                                                 \
             (head)->tqh_circ.tql_prev = (elm)->field.tqe_circ.tql_prev; \
         (elm)->field.tqe_circ.tql_prev->tql_next = (elm)->field.tqe_next; \
         (elm)->field.tqe_circ.tql_prev = NULL;                          \
+        (elm)->field.tqe_circ.tql_next = NULL;                          \
+        (elm)->field.tqe_next = NULL;                                   \
 } while (/*CONSTCOND*/0)
 
 /* remove @left, @right and all elements in between from @head */