From bb755ba47f3747251c0eadf681ee68b9033309b8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 6 Oct 2020 15:38:55 +0300 Subject: qdev: add "check if address free" callback for buses Check if an address is free on the bus before plugging in the device. This makes it possible to do the check without any side effects, and to detect the problem early without having to do it in the realize callback. Signed-off-by: Paolo Bonzini Message-Id: <20201006123904.610658-5-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- hw/core/qdev.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'hw/core/qdev.c') diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 96772a15bd..74db78df36 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -94,13 +94,23 @@ static void bus_add_child(BusState *bus, DeviceState *child) 0); } -void qdev_set_parent_bus(DeviceState *dev, BusState *bus) +static bool bus_check_address(BusState *bus, DeviceState *child, Error **errp) +{ + BusClass *bc = BUS_GET_CLASS(bus); + return !bc->check_address || bc->check_address(bus, child, errp); +} + +bool qdev_set_parent_bus(DeviceState *dev, BusState *bus, Error **errp) { BusState *old_parent_bus = dev->parent_bus; DeviceClass *dc = DEVICE_GET_CLASS(dev); assert(dc->bus_type && object_dynamic_cast(OBJECT(bus), dc->bus_type)); + if (!bus_check_address(bus, dev, errp)) { + return false; + } + if (old_parent_bus) { trace_qdev_update_parent_bus(dev, object_get_typename(OBJECT(dev)), old_parent_bus, object_get_typename(OBJECT(old_parent_bus)), @@ -126,6 +136,7 @@ void qdev_set_parent_bus(DeviceState *dev, BusState *bus) object_unref(OBJECT(old_parent_bus)); object_unref(OBJECT(dev)); } + return true; } DeviceState *qdev_new(const char *name) @@ -371,7 +382,9 @@ bool qdev_realize(DeviceState *dev, BusState *bus, Error **errp) assert(!dev->realized && !dev->parent_bus); if (bus) { - qdev_set_parent_bus(dev, bus); + if (!qdev_set_parent_bus(dev, bus, errp)) { + return false; + } } else { assert(!DEVICE_GET_CLASS(dev)->bus_type); } -- cgit 1.4.1 From 2d24a64661549732fc77f632928318dd52f5bce5 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 6 Oct 2020 15:38:59 +0300 Subject: device-core: use RCU for list of children of a bus This fixes the race between device emulation code that tries to find a child device to dispatch the request to (e.g a scsi disk), and hotplug of a new device to that bus. Note that this doesn't convert all the readers of the list but only these that might go over that list without BQL held. This is a very small first step to make this code thread safe. Suggested-by: Paolo Bonzini Signed-off-by: Maxim Levitsky Reviewed-by: Stefan Hajnoczi Message-Id: <20200913160259.32145-5-mlevitsk@redhat.com> [Use RCU_READ_LOCK_GUARD in more places, adjust testcase now that the delay in DEVICE_DELETED due to RCU is more consistent. - Paolo] Signed-off-by: Paolo Bonzini Message-Id: <20201006123904.610658-9-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- hw/core/bus.c | 28 +++++++++++++++++----------- hw/core/qdev.c | 37 +++++++++++++++++++++++-------------- hw/scsi/scsi-bus.c | 12 +++++++++--- hw/scsi/virtio-scsi.c | 6 +++++- include/hw/qdev-core.h | 9 +++++++++ 5 files changed, 63 insertions(+), 29 deletions(-) (limited to 'hw/core/qdev.c') diff --git a/hw/core/bus.c b/hw/core/bus.c index 6b987b6946..a0483859ae 100644 --- a/hw/core/bus.c +++ b/hw/core/bus.c @@ -49,12 +49,14 @@ int qbus_walk_children(BusState *bus, } } - QTAILQ_FOREACH(kid, &bus->children, sibling) { - err = qdev_walk_children(kid->child, - pre_devfn, pre_busfn, - post_devfn, post_busfn, opaque); - if (err < 0) { - return err; + WITH_RCU_READ_LOCK_GUARD() { + QTAILQ_FOREACH_RCU(kid, &bus->children, sibling) { + err = qdev_walk_children(kid->child, + pre_devfn, pre_busfn, + post_devfn, post_busfn, opaque); + if (err < 0) { + return err; + } } } @@ -90,8 +92,10 @@ static void bus_reset_child_foreach(Object *obj, ResettableChildCallback cb, BusState *bus = BUS(obj); BusChild *kid; - QTAILQ_FOREACH(kid, &bus->children, sibling) { - cb(OBJECT(kid->child), opaque, type); + WITH_RCU_READ_LOCK_GUARD() { + QTAILQ_FOREACH_RCU(kid, &bus->children, sibling) { + cb(OBJECT(kid->child), opaque, type); + } } } @@ -194,9 +198,11 @@ static void bus_set_realized(Object *obj, bool value, Error **errp) /* TODO: recursive realization */ } else if (!value && bus->realized) { - QTAILQ_FOREACH(kid, &bus->children, sibling) { - DeviceState *dev = kid->child; - qdev_unrealize(dev); + WITH_RCU_READ_LOCK_GUARD() { + QTAILQ_FOREACH_RCU(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + qdev_unrealize(dev); + } } if (bc->unrealize) { bc->unrealize(bus); diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 74db78df36..59e5e710b7 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -51,6 +51,12 @@ const VMStateDescription *qdev_get_vmsd(DeviceState *dev) return dc->vmsd; } +static void bus_free_bus_child(BusChild *kid) +{ + object_unref(OBJECT(kid->child)); + g_free(kid); +} + static void bus_remove_child(BusState *bus, DeviceState *child) { BusChild *kid; @@ -60,15 +66,16 @@ static void bus_remove_child(BusState *bus, DeviceState *child) char name[32]; snprintf(name, sizeof(name), "child[%d]", kid->index); - QTAILQ_REMOVE(&bus->children, kid, sibling); + QTAILQ_REMOVE_RCU(&bus->children, kid, sibling); bus->num_children--; /* This gives back ownership of kid->child back to us. */ object_property_del(OBJECT(bus), name); - object_unref(OBJECT(kid->child)); - g_free(kid); - return; + + /* free the bus kid, when it is safe to do so*/ + call_rcu(kid, bus_free_bus_child, rcu); + break; } } } @@ -83,7 +90,7 @@ static void bus_add_child(BusState *bus, DeviceState *child) kid->child = child; object_ref(OBJECT(kid->child)); - QTAILQ_INSERT_HEAD(&bus->children, kid, sibling); + QTAILQ_INSERT_HEAD_RCU(&bus->children, kid, sibling); /* This transfers ownership of kid->child to the property. */ snprintf(name, sizeof(name), "child[%d]", kid->index); @@ -672,17 +679,19 @@ DeviceState *qdev_find_recursive(BusState *bus, const char *id) DeviceState *ret; BusState *child; - QTAILQ_FOREACH(kid, &bus->children, sibling) { - DeviceState *dev = kid->child; + WITH_RCU_READ_LOCK_GUARD() { + QTAILQ_FOREACH_RCU(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; - if (dev->id && strcmp(dev->id, id) == 0) { - return dev; - } + if (dev->id && strcmp(dev->id, id) == 0) { + return dev; + } - QLIST_FOREACH(child, &dev->child_bus, sibling) { - ret = qdev_find_recursive(child, id); - if (ret) { - return ret; + QLIST_FOREACH(child, &dev->child_bus, sibling) { + ret = qdev_find_recursive(child, id); + if (ret) { + return ret; + } } } } diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 6b1ed7ae9a..4cf1f404b4 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -400,7 +400,10 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r) id = r->req.dev->id; found_lun0 = false; n = 0; - QTAILQ_FOREACH(kid, &r->req.bus->qbus.children, sibling) { + + RCU_READ_LOCK_GUARD(); + + QTAILQ_FOREACH_RCU(kid, &r->req.bus->qbus.children, sibling) { DeviceState *qdev = kid->child; SCSIDevice *dev = SCSI_DEVICE(qdev); @@ -421,7 +424,7 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r) memset(r->buf, 0, len); stl_be_p(&r->buf[0], n); i = found_lun0 ? 8 : 16; - QTAILQ_FOREACH(kid, &r->req.bus->qbus.children, sibling) { + QTAILQ_FOREACH_RCU(kid, &r->req.bus->qbus.children, sibling) { DeviceState *qdev = kid->child; SCSIDevice *dev = SCSI_DEVICE(qdev); @@ -430,6 +433,7 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r) i += 8; } } + assert(i == n + 8); r->len = len; return true; @@ -1572,7 +1576,8 @@ SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int id, int lun) BusChild *kid; SCSIDevice *target_dev = NULL; - QTAILQ_FOREACH(kid, &bus->qbus.children, sibling) { + RCU_READ_LOCK_GUARD(); + QTAILQ_FOREACH_RCU(kid, &bus->qbus.children, sibling) { DeviceState *qdev = kid->child; SCSIDevice *dev = SCSI_DEVICE(qdev); @@ -1591,6 +1596,7 @@ SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int id, int lun) } } } + return target_dev; } diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 3a71ea7097..971afbb217 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -367,12 +367,16 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: target = req->req.tmf.lun[1]; s->resetting++; - QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { + + rcu_read_lock(); + QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { d = SCSI_DEVICE(kid->child); if (d->channel == 0 && d->id == target) { qdev_reset_all(&d->qdev); } } + rcu_read_unlock(); + s->resetting--; break; diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 14d476c587..2c6307e3ed 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -3,6 +3,8 @@ #include "qemu/queue.h" #include "qemu/bitmap.h" +#include "qemu/rcu.h" +#include "qemu/rcu_queue.h" #include "qom/object.h" #include "hw/hotplug.h" #include "hw/resettable.h" @@ -238,6 +240,7 @@ struct BusClass { }; typedef struct BusChild { + struct rcu_head rcu; DeviceState *child; int index; QTAILQ_ENTRY(BusChild) sibling; @@ -258,6 +261,12 @@ struct BusState { int max_index; bool realized; int num_children; + + /* + * children is a RCU QTAILQ, thus readers must use RCU to access it, + * and writers must hold the big qemu lock + */ + QTAILQ_HEAD(, BusChild) children; QLIST_ENTRY(BusState) sibling; ResettableState reset; -- cgit 1.4.1 From a23151e8cc8cc08546252dc9c7671171d9c44615 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 6 Oct 2020 15:39:00 +0300 Subject: device-core: use atomic_set on .realized property Some code might race with placement of new devices on a bus. We currently first place a (unrealized) device on the bus and then realize it. As a workaround, users that scan the child device list, can check the realized property to see if it is safe to access such a device. Use an atomic write here too to aid with this. A separate discussion is what to do with devices that are unrealized: It looks like for this case we only call the hotplug handler's unplug callback and its up to it to unrealize the device. An atomic operation doesn't cause harm for this code path though. Signed-off-by: Maxim Levitsky Reviewed-by: Stefan Hajnoczi Message-Id: <20200913160259.32145-6-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini Message-Id: <20201006123904.610658-10-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- hw/core/qdev.c | 19 ++++++++++++++++++- include/hw/qdev-core.h | 2 ++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'hw/core/qdev.c') diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 59e5e710b7..fc4daa36fa 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -946,7 +946,25 @@ static void device_set_realized(Object *obj, bool value, Error **errp) } } + qatomic_store_release(&dev->realized, value); + } else if (!value && dev->realized) { + + /* + * Change the value so that any concurrent users are aware + * that the device is going to be unrealized + * + * TODO: change .realized property to enum that states + * each phase of the device realization/unrealization + */ + + qatomic_set(&dev->realized, value); + /* + * Ensure that concurrent users see this update prior to + * any other changes done by unrealize. + */ + smp_wmb(); + QLIST_FOREACH(bus, &dev->child_bus, sibling) { qbus_unrealize(bus); } @@ -961,7 +979,6 @@ static void device_set_realized(Object *obj, bool value, Error **errp) } assert(local_err == NULL); - dev->realized = value; return; child_realize_fail: diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 2c6307e3ed..868973319e 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -163,6 +163,8 @@ struct NamedClockList { /** * DeviceState: * @realized: Indicates whether the device has been fully constructed. + * When accessed outsize big qemu lock, must be accessed with + * atomic_load_acquire() * @reset: ResettableState for the device; handled by Resettable interface. * * This structure should not be accessed directly. We declare it here -- cgit 1.4.1