Index: Makefile =================================================================== RCS file: /cvs/src/usr.sbin/vmd/Makefile,v retrieving revision 1.27 diff -u -p -r1.27 Makefile --- Makefile 13 Sep 2022 10:28:19 -0000 1.27 +++ Makefile 9 Nov 2022 17:20:36 -0000 @@ -5,7 +5,7 @@ PROG= vmd SRCS= vmd.c control.c log.c priv.c proc.c config.c vmm.c SRCS+= vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c -SRCS+= ns8250.c i8253.c dhcp.c packet.c +SRCS+= ns8250.c i8253.c dhcp.c packet.c task.c SRCS+= parse.y atomicio.c vioscsi.c vioraw.c vioqcow2.c fw_cfg.c SRCS+= vm_agentx.c Index: task.c =================================================================== RCS file: task.c diff -N task.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ task.c 9 Nov 2022 17:20:36 -0000 @@ -0,0 +1,158 @@ +/* $OpenBSD: task.c,v 1.2 2018/06/19 17:12:34 reyk Exp $ */ + +/* + * Copyright (c) 2017 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "task.h" + +#define ISSET(_v, _m) ((_v) & (_m)) +#define SET(_v, _m) ((_v) |= (_m)) +#define CLR(_v, _m) ((_v) &= ~(_m)) + +struct taskq { + pthread_t thread; + struct task_list list; + pthread_mutex_t mtx; + pthread_cond_t cv; +}; + +#define TASK_ONQUEUE (1 << 0) + +static void *taskq_run(void *); + +struct taskq * +taskq_create(const char *name) +{ + struct taskq *tq; + int error; + + tq = malloc(sizeof(*tq)); + if (tq == NULL) + return (NULL); + + TAILQ_INIT(&tq->list); + + error = pthread_mutex_init(&tq->mtx, NULL); + if (error != 0) + goto free; + + error = pthread_cond_init(&tq->cv, NULL); + if (error != 0) + goto mtx; + + error = pthread_create(&tq->thread, NULL, taskq_run, tq); + if (error != 0) + goto cv; + + pthread_set_name_np(tq->thread, name); + + return (tq); + +cv: + pthread_cond_destroy(&tq->cv); +mtx: + pthread_mutex_destroy(&tq->mtx); /* can this really fail? */ +free: + free(tq); + + errno = error; + return (NULL); +} + +static void * +taskq_run(void *tqarg) +{ + struct taskq *tq = tqarg; + struct task *t; + + void (*t_func)(void *); + void *t_arg; + + for (;;) { + pthread_mutex_lock(&tq->mtx); + while ((t = TAILQ_FIRST(&tq->list)) == NULL) + pthread_cond_wait(&tq->cv, &tq->mtx); + + TAILQ_REMOVE(&tq->list, t, t_entry); + CLR(t->t_flags, TASK_ONQUEUE); + + t_func = t->t_func; + t_arg = t->t_arg; + + pthread_mutex_unlock(&tq->mtx); + + (*t_func)(t_arg); + } + + return (NULL); +} + +void +task_set(struct task *t, void (*fn)(void *), void *arg) +{ + t->t_func = fn; + t->t_arg = arg; + t->t_flags = 0; +} + +int +task_add(struct taskq *tq, struct task *t) +{ + int rv = 1; + + if (ISSET(t->t_flags, TASK_ONQUEUE)) + return (0); + + pthread_mutex_lock(&tq->mtx); + if (ISSET(t->t_flags, TASK_ONQUEUE)) + rv = 0; + else { + SET(t->t_flags, TASK_ONQUEUE); + TAILQ_INSERT_TAIL(&tq->list, t, t_entry); + pthread_cond_signal(&tq->cv); + } + pthread_mutex_unlock(&tq->mtx); + + return (rv); +} + +int +task_del(struct taskq *tq, struct task *t) +{ + int rv = 1; + + if (!ISSET(t->t_flags, TASK_ONQUEUE)) + return (0); + + pthread_mutex_lock(&tq->mtx); + if (!ISSET(t->t_flags, TASK_ONQUEUE)) + rv = 0; + else { + TAILQ_REMOVE(&tq->list, t, t_entry); + CLR(t->t_flags, TASK_ONQUEUE); + } + pthread_mutex_unlock(&tq->mtx); + + return (rv); +} Index: task.h =================================================================== RCS file: task.h diff -N task.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ task.h 9 Nov 2022 17:20:36 -0000 @@ -0,0 +1,43 @@ +/* $OpenBSD: task.h,v 1.1 2017/09/15 02:39:33 dlg Exp $ */ + +/* + * Copyright (c) 2013 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _TASK_H_ +#define _TASK_H_ + +#include + +struct taskq; + +struct task { + TAILQ_ENTRY(task) t_entry; + void (*t_func)(void *); + void *t_arg; + unsigned int t_flags; +}; + +TAILQ_HEAD(task_list, task); + +#define TASK_INITIALIZER(_f, _a) {{ NULL, NULL }, (_f), (_a), 0 } + +struct taskq *taskq_create(const char *); + +void task_set(struct task *, void (*)(void *), void *); +int task_add(struct taskq *, struct task *); +int task_del(struct taskq *, struct task *); + +#endif /* _TASK_H_ */ Index: virtio.c =================================================================== RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v retrieving revision 1.97 diff -u -p -r1.97 virtio.c --- virtio.c 29 Aug 2021 18:01:32 -0000 1.97 +++ virtio.c 9 Nov 2022 17:20:36 -0000 @@ -46,6 +46,7 @@ #include "virtio.h" #include "vmd.h" #include "vmm.h" +#include "task.h" extern char *__progname; struct viornd_dev viornd; @@ -54,6 +55,8 @@ struct vionet_dev *vionet; struct vioscsi_dev *vioscsi; struct vmmci_dev vmmci; +struct taskq *iotq; + int nr_vionet; int nr_vioblk; @@ -294,11 +297,17 @@ virtio_rnd_io(int dir, uint16_t reg, uin void vioblk_update_qa(struct vioblk_dev *dev) { + struct vioblk_queue *vbq; + /* Invalid queue? */ if (dev->cfg.queue_select > 0) return; - dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; + vbq = &dev->vbq[dev->cfg.queue_select]; + + vbq->vq.qa = dev->cfg.queue_address; + vbq->ring = vaddr_mem(dev->cfg.queue_address * VIRTIO_PAGE_SIZE, + vring_size(VIOBLK_QUEUE_SIZE)); } void @@ -311,8 +320,8 @@ vioblk_update_qs(struct vioblk_dev *dev) } /* Update queue address/size based on queue select */ - dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; - dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; + dev->cfg.queue_address = dev->vbq[dev->cfg.queue_select].vq.qa; + dev->cfg.queue_size = dev->vbq[dev->cfg.queue_select].vq.qs; } static void @@ -421,77 +430,59 @@ vioblk_finish_write(struct ioinfo *info) /* * XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can */ -int -vioblk_notifyq(struct vioblk_dev *dev) +void +vioblk_notifyq(void *arg) { - uint64_t q_gpa; - uint32_t vr_sz; - uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx; - uint8_t ds; - int cnt, ret; + struct vioblk_queue *vbq = arg; + struct vioblk_dev *dev = vbq->dev; + struct virtio_vq_info *vq = &vbq->vq; + uint16_t cmd_desc_idx, secdata_desc_idx, ds_desc_idx; off_t secbias; char *vr; struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc; struct vring_avail *avail; struct vring_used *used; struct virtio_blk_req_hdr cmd; + unsigned int prod, cons, cnt; + uint8_t ds; - ret = 0; - - /* Invalid queue? */ - if (dev->cfg.queue_notify > 0) - return (0); - - vr_sz = vring_size(VIOBLK_QUEUE_SIZE); - q_gpa = dev->vq[dev->cfg.queue_notify].qa; - q_gpa = q_gpa * VIRTIO_PAGE_SIZE; - - vr = calloc(1, vr_sz); + vr = vbq->ring; if (vr == NULL) { - log_warn("calloc error getting vioblk ring"); - return (0); - } - - if (read_mem(q_gpa, vr, vr_sz)) { - log_warnx("error reading gpa 0x%llx", q_gpa); - goto out; + log_warnx("%s: ring is not initialized", __func__); + return; } /* Compute offsets in ring of descriptors, avail ring, and used ring */ desc = (struct vring_desc *)(vr); - avail = (struct vring_avail *)(vr + - dev->vq[dev->cfg.queue_notify].vq_availoffset); - used = (struct vring_used *)(vr + - dev->vq[dev->cfg.queue_notify].vq_usedoffset); + avail = (struct vring_avail *)(vr + vq->vq_availoffset); + used = (struct vring_used *)(vr + vq->vq_usedoffset); - idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK; + cons = vq->last_avail & VIOBLK_QUEUE_MASK; + prod = avail->idx & VIOBLK_QUEUE_MASK; - if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) { - log_warnx("vioblk queue notify - nothing to do?"); - goto out; - } - - while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) { + if (cons == prod) + return; - cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK; + do { + cmd_desc_idx = avail->ring[cons] & VIOBLK_QUEUE_MASK; cmd_desc = &desc[cmd_desc_idx]; if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) { log_warnx("unchained vioblk cmd descriptor received " "(idx %d)", cmd_desc_idx); - goto out; + break; } /* Read command from descriptor ring */ if (cmd_desc->flags & VRING_DESC_F_WRITE) { log_warnx("vioblk: unexpected writable cmd descriptor " "%d", cmd_desc_idx); - goto out; + return; } if (read_mem(cmd_desc->addr, &cmd, sizeof(cmd))) { log_warnx("vioblk: command read_mem error @ 0x%llx", cmd_desc->addr); - goto out; + break; } switch (cmd.type) { @@ -565,6 +556,7 @@ vioblk_notifyq(struct vioblk_dev *dev) ds_desc = secdata_desc; ds = VIRTIO_BLK_S_OK; + break; case VIRTIO_BLK_T_OUT: secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; @@ -691,22 +683,20 @@ vioblk_notifyq(struct vioblk_dev *dev) goto out; } - ret = 1; dev->cfg.isr_status = 1; used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len; + __sync_synchronize(); used->idx++; - dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & - VIOBLK_QUEUE_MASK; - if (write_mem(q_gpa, vr, vr_sz)) - log_warnx("%s: error writing vio ring", __func__); + cons++; + cons &= VIOBLK_QUEUE_MASK; + } while (cons != prod); - idx = (idx + 1) & VIOBLK_QUEUE_MASK; - } out: - free(vr); - return (ret); + vq->last_avail = cons; + dev->cfg.isr_status = 1; + vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq); } int @@ -739,8 +729,7 @@ virtio_blk_io(int dir, uint16_t reg, uin break; case VIRTIO_CONFIG_QUEUE_NOTIFY: dev->cfg.queue_notify = *data; - if (vioblk_notifyq(dev)) - *intr = 1; + task_add(iotq, &dev->vbq[0].t); break; case VIRTIO_CONFIG_DEVICE_STATUS: dev->cfg.device_status = *data; @@ -754,7 +743,7 @@ virtio_blk_io(int dir, uint16_t reg, uin dev->cfg.queue_select = 0; dev->cfg.queue_notify = 0; dev->cfg.isr_status = 0; - dev->vq[0].last_avail = 0; + dev->vbq[0].vq.last_avail = 0; vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq); } break; @@ -1897,8 +1886,14 @@ virtio_init(struct vmd_vm *vm, int child return; } + iotq = taskq_create("vioblk"); + if (iotq == NULL) + fatalx("unable to create vioblk taskq"); + /* One virtio block device for each disk defined in vcp */ for (i = 0; i < vcp->vcp_ndisks; i++) { + struct virtio_vq_info *vq; + if (pci_add_device(&id, PCI_VENDOR_QUMRANET, PCI_PRODUCT_QUMRANET_VIO_BLOCK, PCI_CLASS_MASS_STORAGE, @@ -1915,13 +1910,16 @@ virtio_init(struct vmd_vm *vm, int child "device", __progname); return; } - vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE; - vioblk[i].vq[0].vq_availoffset = + + vq = &vioblk[i].vbq[0].vq; + vq->qs = VIOBLK_QUEUE_SIZE; + vq->vq_availoffset = sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE; - vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( + vq->vq_usedoffset = VIRTQUEUE_ALIGN( sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE)); - vioblk[i].vq[0].last_avail = 0; + vq->last_avail = 0; + vioblk[i].cfg.device_feature = VIRTIO_BLK_F_SIZE_MAX; vioblk[i].max_xfer = 1048576; vioblk[i].pci_id = id; @@ -1935,6 +1933,10 @@ virtio_init(struct vmd_vm *vm, int child return; } vioblk[i].sz /= 512; + + vioblk[i].vbq[0].dev = &vioblk[i]; + task_set(&vioblk[i].vbq[0].t, + vioblk_notifyq, &vioblk[i].vbq[0]); } } Index: virtio.h =================================================================== RCS file: /cvs/src/usr.sbin/vmd/virtio.h,v retrieving revision 1.42 diff -u -p -r1.42 virtio.h --- virtio.h 4 May 2022 23:17:25 -0000 1.42 +++ virtio.h 9 Nov 2022 17:20:36 -0000 @@ -24,6 +24,7 @@ #include #include "vmd.h" +#include "task.h" #ifndef _VIRTIO_H_ #define _VIRTIO_H_ @@ -167,10 +168,22 @@ struct viornd_dev { uint32_t vm_id; }; +/* + * vioblk + */ + +struct vioblk_queue { + struct vioblk_dev *dev; + void *ring; + struct virtio_vq_info vq; + struct task t; + struct event ev; +}; + struct vioblk_dev { struct virtio_io_cfg cfg; - struct virtio_vq_info vq[VIRTIO_MAX_QUEUES]; + struct vioblk_queue vbq[VIRTIO_MAX_QUEUES]; struct virtio_backing file; uint64_t sz; @@ -181,7 +194,8 @@ struct vioblk_dev { uint32_t vm_id; }; -/* vioscsi will use at least 3 queues - 5.6.2 Virtqueues +/* + * vioscsi will use at least 3 queues - 5.6.2 Virtqueues * Current implementation will use 3 * 0 - control * 1 - event @@ -301,7 +315,7 @@ int vioblk_restore(int, struct vmop_crea int[][VM_MAX_BASE_PER_DISK]); void vioblk_update_qs(struct vioblk_dev *); void vioblk_update_qa(struct vioblk_dev *); -int vioblk_notifyq(struct vioblk_dev *); +void vioblk_notifyq(void *); int virtio_net_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t); int vionet_dump(int); Index: vm.c =================================================================== RCS file: /cvs/src/usr.sbin/vmd/vm.c,v retrieving revision 1.73 diff -u -p -r1.73 vm.c --- vm.c 1 Sep 2022 22:01:40 -0000 1.73 +++ vm.c 9 Nov 2022 17:20:36 -0000 @@ -1924,6 +1924,31 @@ read_mem(paddr_t src, void *buf, size_t return (0); } +void * +vaddr_mem(paddr_t src, size_t len) +{ + struct vm_mem_range *vmr; + size_t off; + + vmr = find_gpa_range(¤t_vm->vm_params.vmc_params, src, len); + if (vmr == NULL) { + errno = EINVAL; + log_warn("%s: failed - invalid memory range src = 0x%lx, " + "len = 0x%zx", __func__, src, len); + return (NULL); + } + + off = src - vmr->vmr_gpa; + if (len > (vmr->vmr_size - off)) { + errno = ENOMEM; + log_warn("%s: failed - invalid memory range src = 0x%lx, " + "len = 0x%zx", __func__, src, len); + return (NULL); + } + + return ((char *)vmr->vmr_va + off); +} + /* * vcpu_assert_pic_irq * Index: vmd.h =================================================================== RCS file: /cvs/src/usr.sbin/vmd/vmd.h,v retrieving revision 1.111 diff -u -p -r1.111 vmd.h --- vmd.h 31 Oct 2022 14:02:11 -0000 1.111 +++ vmd.h 9 Nov 2022 17:20:36 -0000 @@ -454,6 +454,7 @@ int vmm_pipe(struct vmd_vm *, int, void void mutex_lock(pthread_mutex_t *); void mutex_unlock(pthread_mutex_t *); int read_mem(paddr_t, void *buf, size_t); +void *vaddr_mem(paddr_t, size_t); int start_vm(struct vmd_vm *, int); __dead void vm_shutdown(unsigned int); void vm_pipe_init(struct vm_dev_pipe *, void (*)(int, short, void *));