? vmd.debug ? vmd.full Index: Makefile =================================================================== RCS file: /cvs/src/usr.sbin/vmd/Makefile,v retrieving revision 1.16 diff -u -p -r1.16 Makefile --- Makefile 3 Jul 2017 22:21:47 -0000 1.16 +++ Makefile 18 Sep 2017 00:39:00 -0000 @@ -7,6 +7,7 @@ SRCS= vmd.c control.c log.c priv.c proc SRCS+= vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c SRCS+= ns8250.c i8253.c vmboot.c ufs.c disklabel.c dhcp.c packet.c SRCS+= parse.y atomicio.c +SRCS+= task.c CFLAGS+= -Wall -I${.CURDIR} CFLAGS+= -Wstrict-prototypes -Wmissing-prototypes Index: virtio.c =================================================================== RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v retrieving revision 1.54 diff -u -p -r1.54 virtio.c --- virtio.c 17 Sep 2017 23:07:56 -0000 1.54 +++ virtio.c 18 Sep 2017 00:39:00 -0000 @@ -18,6 +18,7 @@ #include /* PAGE_SIZE */ #include +#include /* membars */ #include #include @@ -43,14 +44,48 @@ #include "virtio.h" #include "loadfile.h" #include "atomicio.h" +#include "task.h" + +#ifndef MIN +#define MIN(_a, _b) ((_a) < (_b) ? (_a) : (_b)) +#endif + +#ifndef nitems +#define nitems(_a) (sizeof(_a) / sizeof((_a)[0])) +#endif extern char *__progname; +struct vioblk_queue { + struct vioblk_dev *dev; + void *ring; + struct virtio_vq_info vq; + struct task t; + struct event ev; +}; + +struct vioblk_dev { + struct virtio_io_cfg cfg; + + struct vioblk_queue q[VIRTIO_MAX_QUEUES]; + + int fd; + uint64_t sz; + uint32_t max_xfer; + + uint32_t vm_id; + int irq; + + uint8_t pci_id; +}; + struct viornd_dev viornd; struct vioblk_dev *vioblk; struct vionet_dev *vionet; struct vmmci_dev vmmci; +struct taskq *iotq; + int nr_vionet; int nr_vioblk; @@ -62,13 +97,12 @@ int nr_vioblk; #define VMMCI_F_ACK (1<<1) #define VMMCI_F_SYNCRTC (1<<2) -struct ioinfo { - uint8_t *buf; - ssize_t len; - off_t offset; - int fd; - int error; -}; +int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t); +int vioblk_dump(int); +int vioblk_restore(int, struct vm_create_params *, int *); +void vioblk_update_qs(struct vioblk_dev *); +void vioblk_update_qa(struct vioblk_dev *); +int vioblk_notifyq(struct vioblk_dev *); const char * vioblk_cmd_name(uint32_t type) @@ -85,6 +119,7 @@ vioblk_cmd_name(uint32_t type) } } +#if 0 static void dump_descriptor_chain(struct vring_desc *desc, int16_t dxx) { @@ -108,6 +143,7 @@ dump_descriptor_chain(struct vring_desc desc[dxx].flags, desc[dxx].next); } +#endif static const char * virtio_reg_name(uint8_t reg) @@ -323,7 +359,10 @@ vioblk_update_qa(struct vioblk_dev *dev) if (dev->cfg.queue_select > 0) return; - dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; + dev->q[dev->cfg.queue_select].vq.qa = dev->cfg.queue_address; + dev->q[dev->cfg.queue_select].ring = vaddr_mem( + dev->cfg.queue_address * VIRTIO_PAGE_SIZE, + vring_size(VIOBLK_QUEUE_SIZE)); } void @@ -336,375 +375,184 @@ vioblk_update_qs(struct vioblk_dev *dev) } /* Update queue address/size based on queue select */ - dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; - dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; + dev->cfg.queue_address = dev->q[dev->cfg.queue_select].vq.qa; + dev->cfg.queue_size = dev->q[dev->cfg.queue_select].vq.qs; } -static void -vioblk_free_info(struct ioinfo *info) +static int +vioblk_complete(struct vring_desc *desc, uint8_t ds) { - if (!info) - return; - free(info->buf); - free(info); + if (write_mem(desc->addr, &ds, MIN(desc->len, sizeof(ds)))) { + log_warnx("can't write device status data @ " + "0x%llx", desc->addr); + } + + return (0); } -static struct ioinfo * -vioblk_start_read(struct vioblk_dev *dev, off_t sector, ssize_t sz) +static int +vioblk_io_skip(struct vring_desc *vring, struct vring_desc *desc) { - struct ioinfo *info; + unsigned int idx; - info = calloc(1, sizeof(*info)); - if (!info) - goto nomem; - info->buf = malloc(sz); - if (info->buf == NULL) - goto nomem; - info->len = sz; - info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; - info->fd = dev->fd; - - return info; + do { + idx = desc->next & VIOBLK_QUEUE_MASK; + desc = &vring[idx]; + } while (ISSET(desc->flags, VRING_DESC_F_NEXT)); -nomem: - free(info); - log_warn("malloc errror vioblk read"); - return (NULL); + return (vioblk_complete(desc, VIRTIO_BLK_S_IOERR)); } - -static const uint8_t * -vioblk_finish_read(struct ioinfo *info) -{ - if (pread(info->fd, info->buf, info->len, info->offset) != info->len) { - info->error = errno; - log_warn("vioblk read error"); - return NULL; +static int +vioblk_io(struct vioblk_dev *dev, + ssize_t (*piov)(int, const struct iovec *, int, off_t), + const struct virtio_blk_req_hdr *hdr, + struct vring_desc *desc, + struct vring_desc *vring) +{ + struct iovec iov[128]; + int cnt, iovcnt = 0; + unsigned int idx; + ssize_t rv; + + idx = desc->next & VIOBLK_QUEUE_MASK; + desc = &vring[idx]; + + if (!ISSET(desc->flags, VRING_DESC_F_NEXT)) { + log_warnx("unchained vioblk data descriptor " + "received (idx %u)", idx); + return (-1); } - return info->buf; -} - -static struct ioinfo * -vioblk_start_write(struct vioblk_dev *dev, off_t sector, paddr_t addr, size_t len) -{ - struct ioinfo *info; - - info = calloc(1, sizeof(*info)); - if (!info) - goto nomem; - info->buf = malloc(len); - if (info->buf == NULL) - goto nomem; - info->len = len; - info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; - info->fd = dev->fd; - - if (read_mem(addr, info->buf, len)) { - vioblk_free_info(info); - return NULL; - } + do { + cnt = iovec_mem(desc->addr, desc->len, + iov + iovcnt, nitems(iov) - iovcnt); + if (cnt == -1) { + log_warnx("invalid dma address 0x%016llx", + desc->addr); + return vioblk_io_skip(vring, desc); + } + + iovcnt += cnt; + if (iovcnt == nitems(iov)) { + log_warnx("%s: iov is too small", __func__); + return vioblk_io_skip(vring, desc); + } + + idx = desc->next & VIOBLK_QUEUE_MASK; + desc = &vring[idx]; + } while (ISSET(desc->flags, VRING_DESC_F_NEXT)); - return info; + do { + rv = (*piov)(dev->fd, iov, iovcnt, + hdr->sector * VIRTIO_BLK_SECTOR_SIZE); + } while (rv == -1 && errno == EINTR); -nomem: - free(info); - log_warn("malloc errror vioblk write"); - return (NULL); -} + if (rv == -1) + log_warn("boo hiss @ %llu", hdr->sector); -static int -vioblk_finish_write(struct ioinfo *info) -{ - if (pwrite(info->fd, info->buf, info->len, info->offset) != info->len) { - log_warn("vioblk write error"); - return EIO; - } - return 0; + return vioblk_complete(desc, + rv == -1 ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK); } /* * XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can * XXX cant trust ring data from VM, be extra cautious. */ -int -vioblk_notifyq(struct vioblk_dev *dev) +static void +vioblk_q(void *arg) { - uint64_t q_gpa; - uint32_t vr_sz; - uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx; - uint8_t ds; - int ret; - off_t secbias; + struct vioblk_queue *queue = arg; + struct vioblk_dev *dev = queue->dev; + uint16_t cmd_desc_idx, desc_idx; char *vr; - struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc; + struct vring_desc *desc, *cmd_desc; struct vring_avail *avail; struct vring_used *used; struct virtio_blk_req_hdr cmd; + unsigned int prod, cons; + uint8_t ds; - ret = 0; - - /* Invalid queue? */ - if (dev->cfg.queue_notify > 0) - return (0); - - vr_sz = vring_size(VIOBLK_QUEUE_SIZE); - q_gpa = dev->vq[dev->cfg.queue_notify].qa; - q_gpa = q_gpa * VIRTIO_PAGE_SIZE; - - vr = calloc(1, vr_sz); - if (vr == NULL) { - log_warn("calloc error getting vioblk ring"); - return (0); - } - - if (read_mem(q_gpa, vr, vr_sz)) { - log_warnx("error reading gpa 0x%llx", q_gpa); - goto out; - } + vr = queue->ring; + if (vr == NULL) + return; /* Compute offsets in ring of descriptors, avail ring, and used ring */ desc = (struct vring_desc *)(vr); - avail = (struct vring_avail *)(vr + - dev->vq[dev->cfg.queue_notify].vq_availoffset); - used = (struct vring_used *)(vr + - dev->vq[dev->cfg.queue_notify].vq_usedoffset); + avail = (struct vring_avail *)(vr + queue->vq.vq_availoffset); + used = (struct vring_used *)(vr + queue->vq.vq_usedoffset); - idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK; + cons = queue->vq.last_avail & VIOBLK_QUEUE_MASK; + prod = avail->idx & VIOBLK_QUEUE_MASK; - if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) { - log_warnx("vioblk queue notify - nothing to do?"); - goto out; - } - - while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) { + if (cons == prod) + return; - cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK; + membar_consumer(); + do { + cmd_desc_idx = avail->ring[cons] & VIOBLK_QUEUE_MASK; cmd_desc = &desc[cmd_desc_idx]; - if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) { + if (!ISSET(cmd_desc->flags, VRING_DESC_F_NEXT)) { log_warnx("unchained vioblk cmd descriptor received " "(idx %d)", cmd_desc_idx); - goto out; + break; } /* Read command from descriptor ring */ if (read_mem(cmd_desc->addr, &cmd, cmd_desc->len)) { log_warnx("vioblk: command read_mem error @ 0x%llx", cmd_desc->addr); - goto out; + break; } switch (cmd.type) { case VIRTIO_BLK_T_IN: - /* first descriptor */ - secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; - secdata_desc = &desc[secdata_desc_idx]; - - if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { - log_warnx("unchained vioblk data descriptor " - "received (idx %d)", cmd_desc_idx); - goto out; - } - - secbias = 0; - do { - struct ioinfo *info; - const uint8_t *secdata; - - info = vioblk_start_read(dev, cmd.sector + secbias, - (ssize_t)secdata_desc->len); - - /* read the data (use current data descriptor) */ - secdata = vioblk_finish_read(info); - if (secdata == NULL) { - vioblk_free_info(info); - log_warnx("vioblk: block read error, " - "sector %lld", cmd.sector); - goto out; - } - - if (write_mem(secdata_desc->addr, secdata, - secdata_desc->len)) { - log_warnx("can't write sector " - "data to gpa @ 0x%llx", - secdata_desc->addr); - dump_descriptor_chain(desc, cmd_desc_idx); - vioblk_free_info(info); - goto out; - } - - vioblk_free_info(info); - - secbias += (secdata_desc->len / VIRTIO_BLK_SECTOR_SIZE); - secdata_desc_idx = secdata_desc->next & - VIOBLK_QUEUE_MASK; - secdata_desc = &desc[secdata_desc_idx]; - } while (secdata_desc->flags & VRING_DESC_F_NEXT); - - ds_desc_idx = secdata_desc_idx; - ds_desc = secdata_desc; - - ds = VIRTIO_BLK_S_OK; - if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { - log_warnx("can't write device status data @ " - "0x%llx", ds_desc->addr); - dump_descriptor_chain(desc, cmd_desc_idx); - goto out; - } - - ret = 1; - dev->cfg.isr_status = 1; - used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; - used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len; - used->idx++; - - dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & - VIOBLK_QUEUE_MASK; + if (vioblk_io(dev, preadv, &cmd, cmd_desc, desc) != 0) + goto fail; - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("vioblk: error writing vio ring"); - } break; case VIRTIO_BLK_T_OUT: - secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; - secdata_desc = &desc[secdata_desc_idx]; - - if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { - log_warnx("wr vioblk: unchained vioblk data " - "descriptor received (idx %d)", - cmd_desc_idx); - goto out; - } - - if (secdata_desc->len > dev->max_xfer) { - log_warnx("%s: invalid read size %d requested", - __func__, secdata_desc->len); - goto out; - } - - secbias = 0; - do { - struct ioinfo *info; - - info = vioblk_start_write(dev, cmd.sector + secbias, - secdata_desc->addr, secdata_desc->len); - - if (info == NULL) { - log_warnx("wr vioblk: can't read " - "sector data @ 0x%llx", - secdata_desc->addr); - dump_descriptor_chain(desc, - cmd_desc_idx); - goto out; - } - - if (vioblk_finish_write(info)) { - log_warnx("wr vioblk: disk write " - "error"); - vioblk_free_info(info); - goto out; - } - - vioblk_free_info(info); - - secbias += secdata_desc->len / - VIRTIO_BLK_SECTOR_SIZE; - - secdata_desc_idx = secdata_desc->next & - VIOBLK_QUEUE_MASK; - secdata_desc = &desc[secdata_desc_idx]; - } while (secdata_desc->flags & VRING_DESC_F_NEXT); - - ds_desc_idx = secdata_desc_idx; - ds_desc = secdata_desc; - - ds = VIRTIO_BLK_S_OK; - if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { - log_warnx("wr vioblk: can't write device " - "status data @ 0x%llx", ds_desc->addr); - dump_descriptor_chain(desc, cmd_desc_idx); - goto out; - } + if (vioblk_io(dev, pwritev, &cmd, cmd_desc, desc) != 0) + goto fail; - ret = 1; - dev->cfg.isr_status = 1; - used->ring[used->idx & VIOBLK_QUEUE_MASK].id = - cmd_desc_idx; - used->ring[used->idx & VIOBLK_QUEUE_MASK].len = - cmd_desc->len; - used->idx++; - - dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & - VIOBLK_QUEUE_MASK; - if (write_mem(q_gpa, vr, vr_sz)) - log_warnx("wr vioblk: error writing vio ring"); break; + case VIRTIO_BLK_T_FLUSH: case VIRTIO_BLK_T_FLUSH_OUT: - ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; - ds_desc = &desc[ds_desc_idx]; - ds = VIRTIO_BLK_S_OK; - if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { - log_warnx("fl vioblk: can't write device status " - "data @ 0x%llx", ds_desc->addr); - dump_descriptor_chain(desc, cmd_desc_idx); - goto out; - } + if (fsync(dev->fd) == -1) + ds = VIRTIO_BLK_S_IOERR; - ret = 1; - dev->cfg.isr_status = 1; - used->ring[used->idx & VIOBLK_QUEUE_MASK].id = - cmd_desc_idx; - used->ring[used->idx & VIOBLK_QUEUE_MASK].len = - cmd_desc->len; - used->idx++; + desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; + desc = &desc[desc_idx]; - dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & - VIOBLK_QUEUE_MASK; - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("fl vioblk: error writing vio ring"); - } + vioblk_complete(desc, ds); break; + default: log_warnx("%s: unsupported command 0x%x", __func__, cmd.type); - ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; - ds_desc = &desc[ds_desc_idx]; + vioblk_io_skip(desc, cmd_desc); + break; + } - ds = VIRTIO_BLK_S_UNSUPP; - if (write_mem(ds_desc->addr, &ds, ds_desc->len)) { - log_warnx("%s: get id : can't write device " - "status data @ 0x%llx", __func__, - ds_desc->addr); - dump_descriptor_chain(desc, cmd_desc_idx); - goto out; - } + used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; + used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len; + membar_producer(); + used->idx++; - ret = 1; - dev->cfg.isr_status = 1; - used->ring[used->idx & VIOBLK_QUEUE_MASK].id = - cmd_desc_idx; - used->ring[used->idx & VIOBLK_QUEUE_MASK].len = - cmd_desc->len; - used->idx++; + cons++; + cons &= VIOBLK_QUEUE_MASK; + } while (cons != prod); - dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & - VIOBLK_QUEUE_MASK; - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: get id : error writing vio ring", - __func__); - } - break; - } +fail: + queue->vq.last_avail = cons; - idx = (idx + 1) & VIOBLK_QUEUE_MASK; - } -out: - free(vr); - return (ret); + vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq); + + dev->cfg.isr_status = 1; } int @@ -737,8 +585,7 @@ virtio_blk_io(int dir, uint16_t reg, uin break; case VIRTIO_CONFIG_QUEUE_NOTIFY: dev->cfg.queue_notify = *data; - if (vioblk_notifyq(dev)) - *intr = 1; + task_add(iotq, &dev->q[0].t); break; case VIRTIO_CONFIG_DEVICE_STATUS: dev->cfg.device_status = *data; @@ -752,7 +599,7 @@ virtio_blk_io(int dir, uint16_t reg, uin dev->cfg.queue_select = 0; dev->cfg.queue_notify = 0; dev->cfg.isr_status = 0; - dev->vq[0].last_avail = 0; + dev->q[0].vq.last_avail = 0; } break; default: @@ -1710,6 +1557,10 @@ virtio_init(struct vmd_vm *vm, int *chil return; } + iotq = taskq_create("iotq"); + if (iotq == NULL) + fatalx("unable to create io taskq"); + /* One virtio block device for each disk defined in vcp */ for (i = 0; i < vcp->vcp_ndisks; i++) { if ((sz = lseek(child_disks[i], 0, SEEK_END)) == -1) @@ -1731,18 +1582,26 @@ virtio_init(struct vmd_vm *vm, int *chil "device", __progname); return; } - vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE; - vioblk[i].vq[0].vq_availoffset = + + vioblk[i].q[0].dev = &vioblk[i]; + vioblk[i].q[0].ring = NULL; + vioblk[i].q[0].vq.qs = VIOBLK_QUEUE_SIZE; + vioblk[i].q[0].vq.vq_availoffset = sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE; - vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( + vioblk[i].q[0].vq.vq_usedoffset = VIRTQUEUE_ALIGN( sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE)); - vioblk[i].vq[0].last_avail = 0; + vioblk[i].q[0].vq.last_avail = 0; vioblk[i].fd = child_disks[i]; vioblk[i].sz = sz / 512; vioblk[i].cfg.device_feature = VIRTIO_BLK_F_SIZE_MAX; vioblk[i].max_xfer = 1048576; + + vioblk[i].vm_id = vcp->vcp_id; + vioblk[i].irq = pci_get_dev_irq(id); vioblk[i].pci_id = id; + + task_set(&vioblk[i].q[0].t, vioblk_q, &vioblk[i].q[0]); } } Index: virtio.h =================================================================== RCS file: /cvs/src/usr.sbin/vmd/virtio.h,v retrieving revision 1.21 diff -u -p -r1.21 virtio.h --- virtio.h 17 Sep 2017 23:07:56 -0000 1.21 +++ virtio.h 18 Sep 2017 00:39:00 -0000 @@ -99,18 +99,6 @@ struct viornd_dev { uint8_t pci_id; }; -struct vioblk_dev { - struct virtio_io_cfg cfg; - - struct virtio_vq_info vq[VIRTIO_MAX_QUEUES]; - - int fd; - uint64_t sz; - uint32_t max_xfer; - - uint8_t pci_id; -}; - struct vionet_dev { pthread_mutex_t mutex; struct event event; @@ -179,13 +167,6 @@ int viornd_restore(int); void viornd_update_qs(void); void viornd_update_qa(void); int viornd_notifyq(void); - -int virtio_blk_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t); -int vioblk_dump(int); -int vioblk_restore(int, struct vm_create_params *, int *); -void vioblk_update_qs(struct vioblk_dev *); -void vioblk_update_qa(struct vioblk_dev *); -int vioblk_notifyq(struct vioblk_dev *); int virtio_net_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t); int vionet_dump(int); Index: vm.c =================================================================== RCS file: /cvs/src/usr.sbin/vmd/vm.c,v retrieving revision 1.27 diff -u -p -r1.27 vm.c --- vm.c 17 Sep 2017 23:07:56 -0000 1.27 +++ vm.c 18 Sep 2017 00:39:00 -0000 @@ -1578,7 +1578,7 @@ vaddr_mem(paddr_t gpa, size_t len) if (gpa < vmr->vmr_gpa) continue; - if (gpend >= vmr->vmr_gpa + vmr->vmr_size) + if (gpend > vmr->vmr_gpa + vmr->vmr_size) continue; return ((char *)vmr->vmr_va + (gpa - vmr->vmr_gpa));