Index: sys/intrmap.h =================================================================== RCS file: sys/intrmap.h diff -N sys/intrmap.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/intrmap.h 10 Jun 2020 08:19:11 -0000 @@ -0,0 +1,38 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2020 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_INTRMAP_H_ +#define _SYS_INTRMAP_H_ + +struct intrmap; + +#define INTRMAP_POWEROF2 (1 << 0) + +struct intrmap *intrmap_create(const struct device *, + unsigned int, unsigned int, unsigned int); +void intrmap_destroy(struct intrmap *); + +void intrmap_match(const struct device *, + struct intrmap *, struct intrmap *); +void intrmap_align(const struct device *, + struct intrmap *, struct intrmap *); + +unsigned int intrmap_count(const struct intrmap *); +unsigned int intrmap_cpu(const struct intrmap *, unsigned int); + +#endif /* _SYS_INTRMAP_H_ */ Index: kern/kern_intrmap.c =================================================================== RCS file: kern/kern_intrmap.c diff -N kern/kern_intrmap.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ kern/kern_intrmap.c 10 Jun 2020 08:19:10 -0000 @@ -0,0 +1,245 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 1980, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)if.c 8.3 (Berkeley) 1/4/94 + * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ + */ + +/* + * This code is adapted from the if_ringmap code in DragonflyBSD, + * but generalised for use by all types of devices, not just network + * cards. + */ + +#include +#include +#include +#include + +#include + +#define intrmap_ncpu ncpus + +struct intrmap { + unsigned int im_count; + unsigned int im_grid; + unsigned int *im_cpumap; +}; + +static int +intrmap_nintrs(unsigned int nintrs, unsigned int maxintrs) +{ + KASSERTMSG(maxintrs > 0, "invalid maximum interrupt count %u", + maxintrs); + + if (nintrs == 0 || nintrs > maxintrs) + nintrs = maxintrs; + if (nintrs > intrmap_ncpu) + nintrs = intrmap_ncpu; + return (nintrs); +} + +static void +intrmap_set_grid(unsigned int unit, struct intrmap *im, unsigned int grid) +{ + unsigned int i, offset; + unsigned int *cpumap = im->im_cpumap; + + KASSERTMSG(grid > 0, "invalid if_ringmap grid %u", grid); + KASSERTMSG(grid >= im->im_count, "invalid intrmap grid %u, count %u", + grid, im->im_count); + im->im_grid = grid; + + offset = (grid * unit) % intrmap_ncpu; + for (i = 0; i < im->im_count; i++) { + cpumap[i] = offset + i; + KASSERTMSG(cpumap[i] < intrmap_ncpu, + "invalid cpumap[%u] = %u, offset %u (ncpu %d)", i, + cpumap[i], offset, intrmap_ncpu); + } +} + +struct intrmap * +intrmap_create(const struct device *dv, + unsigned int nintrs, unsigned int maxintrs, unsigned int flags) +{ + struct intrmap *im; + unsigned int unit = dv->dv_unit; + unsigned int i, grid = 0, prev_grid; + + nintrs = intrmap_nintrs(nintrs, maxintrs); + if (ISSET(flags, INTRMAP_POWEROF2)) + nintrs = 1 << (fls(nintrs) - 1); + im = malloc(sizeof(*im), M_DEVBUF, M_WAITOK | M_ZERO); + im->im_count = nintrs; + im->im_cpumap = mallocarray(nintrs, sizeof(*im->im_cpumap), M_DEVBUF, + M_WAITOK | M_ZERO); + + prev_grid = intrmap_ncpu; + for (i = 0; i < intrmap_ncpu; i++) { + if (intrmap_ncpu % (i + 1) != 0) + continue; + + grid = intrmap_ncpu / (i + 1); + if (nintrs > grid) { + grid = prev_grid; + break; + } + + if (nintrs > intrmap_ncpu / (i + 2)) + break; + prev_grid = grid; + } + intrmap_set_grid(unit, im, grid); + + return (im); +} + +void +intrmap_destroy(struct intrmap *im) +{ + free(im->im_cpumap, M_DEVBUF, im->im_count * sizeof(*im->im_cpumap)); + free(im, M_DEVBUF, sizeof(*im)); +} + +/* + * Align the two ringmaps. + * + * e.g. 8 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. + * + * Before: + * + * CPU 0 1 2 3 4 5 6 7 + * NIC_RX n0 n1 n2 n3 + * NIC_TX N0 N1 + * + * After: + * + * CPU 0 1 2 3 4 5 6 7 + * NIC_RX n0 n1 n2 n3 + * NIC_TX N0 N1 + */ +void +intrmap_align(const struct device *dv, + struct intrmap *im0, struct intrmap *im1) +{ + unsigned int unit = dv->dv_unit; + + if (im0->im_grid > im1->im_grid) + intrmap_set_grid(unit, im1, im0->im_grid); + else if (im0->im_grid < im1->im_grid) + intrmap_set_grid(unit, im0, im1->im_grid); +} + +void +intrmap_match(const struct device *dv, + struct intrmap *im0, struct intrmap *im1) +{ + unsigned int unit = dv->dv_unit; + unsigned int subset_grid, cnt, divisor, mod, offset, i; + struct intrmap *subset_im, *im; + unsigned int old_im0_grid, old_im1_grid; + + if (im0->im_grid == im1->im_grid) + return; + + /* Save grid for later use */ + old_im0_grid = im0->im_grid; + old_im1_grid = im1->im_grid; + + intrmap_align(dv, im0, im1); + + /* + * Re-shuffle rings to get more even distribution. + * + * e.g. 12 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. + * + * CPU 0 1 2 3 4 5 6 7 8 9 10 11 + * + * NIC_RX a0 a1 a2 a3 b0 b1 b2 b3 c0 c1 c2 c3 + * NIC_TX A0 A1 B0 B1 C0 C1 + * + * NIC_RX d0 d1 d2 d3 e0 e1 e2 e3 f0 f1 f2 f3 + * NIC_TX D0 D1 E0 E1 F0 F1 + */ + + if (im0->im_count >= (2 * old_im1_grid)) { + cnt = im0->im_count; + subset_grid = old_im1_grid; + subset_im = im1; + im = im0; + } else if (im1->im_count > (2 * old_im0_grid)) { + cnt = im1->im_count; + subset_grid = old_im0_grid; + subset_im = im0; + im = im1; + } else { + /* No space to shuffle. */ + return; + } + + mod = cnt / subset_grid; + KASSERT(mod >= 2); + divisor = intrmap_ncpu / im->im_grid; + offset = ((unit / divisor) % mod) * subset_grid; + + for (i = 0; i < subset_im->im_count; i++) { + subset_im->im_cpumap[i] += offset; + KASSERTMSG(subset_im->im_cpumap[i] < intrmap_ncpu, + "match: invalid cpumap[%d] = %d, offset %d", + i, subset_im->im_cpumap[i], offset); + } +#ifdef DIAGNOSTIC + for (i = 0; i < subset_im->im_count; i++) { + unsigned int j; + + for (j = 0; j < im->im_count; j++) { + if (im->im_cpumap[j] == subset_im->im_cpumap[i]) + break; + } + KASSERTMSG(j < im->im_count, + "subset cpumap[%u] = %u not found in superset", + i, subset_im->im_cpumap[i]); + } +#endif +} + +unsigned int +intrmap_count(const struct intrmap *im) +{ + return (im->im_count); +} + +unsigned int +intrmap_cpu(const struct intrmap *im, unsigned int ring) +{ + KASSERTMSG(ring >= 0 && ring < im->im_count, "invalid ring %u", ring); + return (im->im_cpumap[ring]); +} Index: dev/pci/if_vmx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_vmx.c,v retrieving revision 1.56 diff -u -p -r1.56 if_vmx.c --- dev/pci/if_vmx.c 28 May 2020 07:21:56 -0000 1.56 +++ dev/pci/if_vmx.c 10 Jun 2020 08:19:09 -0000 @@ -17,6 +17,7 @@ */ #include "bpfilter.h" +#include "kstat.h" #include #include @@ -25,8 +26,11 @@ #include #include #include +#include +#include #include +#include #include #include @@ -42,7 +46,7 @@ #include #include -#define VMX_MAX_QUEUES 1 +#define VMX_MAX_QUEUES MIN(VMXNET3_MAX_TX_QUEUES, VMXNET3_MAX_RX_QUEUES) #define NTXDESC 512 /* tx ring size */ #define NTXSEGS 8 /* tx descriptors per packet */ @@ -90,26 +94,67 @@ struct vmxnet3_comp_ring { u_int32_t gen; }; +struct vmx_txstats_kv { + struct kstat_kv tso_packets; + struct kstat_kv tso_bytes; + struct kstat_kv ucast_packets; + struct kstat_kv ucast_bytes; + struct kstat_kv mcast_packets; + struct kstat_kv mcast_bytes; + struct kstat_kv bcast_packets; + struct kstat_kv bcast_bytes; + struct kstat_kv errors; + struct kstat_kv discards; +}; + struct vmxnet3_txqueue { + struct vmxnet3_softc *sc; /* sigh */ struct vmxnet3_txring cmd_ring; struct vmxnet3_comp_ring comp_ring; struct vmxnet3_txq_shared *ts; struct ifqueue *ifq; + + struct kstat *txkstat; +}; + +struct vmx_rxstats_kv { + struct kstat_kv lro_packets; + struct kstat_kv lro_bytes; + struct kstat_kv ucast_packets; + struct kstat_kv ucast_bytes; + struct kstat_kv mcast_packets; + struct kstat_kv mcast_bytes; + struct kstat_kv bcast_packets; + struct kstat_kv bcast_bytes; + struct kstat_kv nobuffers; + struct kstat_kv errors; }; struct vmxnet3_rxqueue { + struct vmxnet3_softc *sc; /* sigh */ struct vmxnet3_rxring cmd_ring[2]; struct vmxnet3_comp_ring comp_ring; struct vmxnet3_rxq_shared *rs; struct ifiqueue *ifiq; + + struct kstat *rxkstat; }; struct vmxnet3_queue { struct vmxnet3_txqueue tx; struct vmxnet3_rxqueue rx; struct vmxnet3_softc *sc; - char intrname[8]; + char intrname[16]; + void *ih; int intr; +} __aligned(64); + +struct vmx_kstats { + struct rwlock lock; + struct timeval updated; + + struct vmx_txstats_kv txstats; + struct vmx_rxstats_kv rxstats; }; struct vmxnet3_softc { @@ -123,13 +168,19 @@ struct vmxnet3_softc { bus_space_handle_t sc_ioh1; bus_dma_tag_t sc_dmat; void *sc_ih; - void *sc_qih[VMX_MAX_QUEUES]; - int sc_nintr; + int sc_nqueues; + struct vmxnet3_queue *sc_q; + + struct intrmap *sc_intrmap; - struct vmxnet3_queue sc_q[VMX_MAX_QUEUES]; struct vmxnet3_driver_shared *sc_ds; u_int8_t *sc_mcast; + struct vmxnet3_upt1_rss_conf *sc_rss; + +#if NKSTAT > 0 + struct vmx_kstats sc_kstats; +#endif }; #define VMXNET3_STAT @@ -196,6 +247,14 @@ void vmxnet3_media_status(struct ifnet * int vmxnet3_media_change(struct ifnet *); void *vmxnet3_dma_allocmem(struct vmxnet3_softc *, u_int, u_int, bus_addr_t *); +#if NKSTAT > 0 +static void vmx_kstat_init(struct vmxnet3_softc *); +static void vmx_kstat_txstats(struct vmxnet3_softc *, + struct vmxnet3_txqueue *, int); +static void vmx_kstat_rxstats(struct vmxnet3_softc *, + struct vmxnet3_rxqueue *, int); +#endif + const struct pci_matchid vmx_devices[] = { { PCI_VENDOR_VMWARE, PCI_PRODUCT_VMWARE_NET_3 } }; @@ -225,6 +284,7 @@ vmxnet3_attach(struct device *parent, st u_int memtype, ver, macl, mach, intrcfg; u_char enaddr[ETHER_ADDR_LEN]; int (*isr)(void *); + int msix = 0; int i; memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, 0x10); @@ -249,7 +309,7 @@ vmxnet3_attach(struct device *parent, st ver = READ_BAR1(sc, VMXNET3_BAR1_UVRS); if ((ver & 0x1) == 0) { - printf(": incompatiable UPT version 0x%x\n", ver); + printf(": incompatible UPT version 0x%x\n", ver); return; } WRITE_BAR1(sc, VMXNET3_BAR1_UVRS, 1); @@ -259,18 +319,26 @@ vmxnet3_attach(struct device *parent, st WRITE_CMD(sc, VMXNET3_CMD_GET_INTRCFG); intrcfg = READ_BAR1(sc, VMXNET3_BAR1_CMD); isr = vmxnet3_intr; - sc->sc_nintr = 0; sc->sc_nqueues = 1; switch (intrcfg & VMXNET3_INTRCFG_TYPE_MASK) { case VMXNET3_INTRCFG_TYPE_AUTO: case VMXNET3_INTRCFG_TYPE_MSIX: - if (pci_intr_map_msix(pa, 0, &ih) == 0) { - isr = vmxnet3_intr_event; - sc->sc_nintr = sc->sc_nqueues + 1; - break; + msix = pci_intr_msix_count(pa->pa_pc, pa->pa_tag); + if (msix > 0) { + if (pci_intr_map_msix(pa, 0, &ih) == 0) { + msix--; /* are there spares for tx/rx qs? */ + if (msix > 0) { + isr = vmxnet3_intr_event; + sc->sc_intrmap = + intrmap_create(&sc->sc_dev, + msix, VMX_MAX_QUEUES, 0); + sc->sc_nqueues = + intrmap_count(sc->sc_intrmap); + } + break; + } } - /* FALLTHROUGH */ case VMXNET3_INTRCFG_TYPE_MSI: if (pci_intr_map_msi(pa, &ih) == 0) @@ -291,22 +359,27 @@ vmxnet3_attach(struct device *parent, st if (intrstr) printf(": %s", intrstr); - if (sc->sc_nintr > 1) { + sc->sc_q = mallocarray(sc->sc_nqueues, sizeof(*sc->sc_q), + M_DEVBUF, M_WAITOK|M_ZERO); + if (sc->sc_intrmap != NULL) { for (i = 0; i < sc->sc_nqueues; i++) { struct vmxnet3_queue *q; int vec; + int cpu; q = &sc->sc_q[i]; vec = i + 1; + cpu = intrmap_cpu(sc->sc_intrmap, i); + if (pci_intr_map_msix(pa, vec, &ih) != 0) { printf(", failed to map interrupt %d\n", vec); return; } - snprintf(q->intrname, sizeof(q->intrname), "%s:%d", - self->dv_xname, i); - sc->sc_qih[i] = pci_intr_establish(pa->pa_pc, ih, + snprintf(q->intrname, sizeof(q->intrname), "%s:%d:%d", + self->dv_xname, i, cpu); + q->ih = pci_intr_establish_cpuid(pa->pa_pc, ih, IPL_NET | IPL_MPSAFE, vmxnet3_intr_queue, q, - q->intrname); + q->intrname, cpu); q->intr = vec; q->sc = sc; @@ -367,12 +440,21 @@ vmxnet3_attach(struct device *parent, st ether_ifattach(ifp); vmxnet3_link_state(sc); +#if NKSTAT > 0 + vmx_kstat_init(sc); +#endif + if_attach_queues(ifp, sc->sc_nqueues); if_attach_iqueues(ifp, sc->sc_nqueues); for (i = 0; i < sc->sc_nqueues; i++) { ifp->if_ifqs[i]->ifq_softc = &sc->sc_q[i].tx; sc->sc_q[i].tx.ifq = ifp->if_ifqs[i]; sc->sc_q[i].rx.ifiq = ifp->if_iqs[i]; + +#if NKSTAT > 0 + vmx_kstat_txstats(sc, &sc->sc_q[i].tx, i); + vmx_kstat_rxstats(sc, &sc->sc_q[i].rx, i); +#endif } } @@ -397,10 +479,7 @@ vmxnet3_dma_init(struct vmxnet3_softc *s sc->sc_q[queue].rx.rs = rs++; for (queue = 0; queue < sc->sc_nqueues; queue++) { - if (sc->sc_nintr > 0) - intr = queue + 1; - else - intr = 0; + intr = sc->sc_q[queue].intr; if (vmxnet3_alloc_txring(sc, queue, intr)) return -1; @@ -451,11 +530,38 @@ vmxnet3_dma_init(struct vmxnet3_softc *s ds->nrxqueue = sc->sc_nqueues; ds->mcast_table = mcast_pa; ds->automask = 1; - ds->nintr = sc->sc_nintr; + ds->nintr = 1 + (sc->sc_intrmap != NULL ? sc->sc_nqueues : 0); ds->evintr = 0; ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL; - for (i = 0; i < sc->sc_nintr; i++) + for (i = 0; i < ds->nintr; i++) ds->modlevel[i] = UPT1_IMOD_ADAPTIVE; + + if (sc->sc_nqueues > 1) { + struct vmxnet3_upt1_rss_conf *rsscfg; + bus_addr_t rss_pa; + + rsscfg = vmxnet3_dma_allocmem(sc, sizeof(*rsscfg), 8, &rss_pa); + + rsscfg->hash_type = UPT1_RSS_HASH_TYPE_TCP_IPV4 | + UPT1_RSS_HASH_TYPE_IPV4 | + UPT1_RSS_HASH_TYPE_TCP_IPV6 | + UPT1_RSS_HASH_TYPE_IPV6; + rsscfg->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ; + rsscfg->hash_key_size = sizeof(rsscfg->hash_key); + stoeplitz_to_key(rsscfg->hash_key, sizeof(rsscfg->hash_key)); + + rsscfg->ind_table_size = sizeof(rsscfg->ind_table); + for (i = 0; i < sizeof(rsscfg->ind_table); i++) + rsscfg->ind_table[i] = i % sc->sc_nqueues; + + ds->upt_features |= UPT1_F_RSS; + ds->rss.version = 1; + ds->rss.len = sizeof(*rsscfg); + ds->rss.paddr = rss_pa; + + sc->sc_rss = rsscfg; + } + WRITE_BAR1(sc, VMXNET3_BAR1_DSL, ds_pa); WRITE_BAR1(sc, VMXNET3_BAR1_DSH, (u_int64_t)ds_pa >> 32); return 0; @@ -635,6 +741,157 @@ vmxnet3_rxfill(struct vmxnet3_rxring *ri timeout_add(&ring->refill, 1); } +#if NKSTAT > 0 +static const struct timeval vmx_kstat_rate = { 1, 0 }; + +static void +vmx_kstat_init(struct vmxnet3_softc *sc) +{ + struct vmx_txstats_kv *txkvs = &sc->sc_kstats.txstats; + struct vmx_rxstats_kv *rxkvs = &sc->sc_kstats.rxstats; + + rw_init(&sc->sc_kstats.lock, "vmxstats"); + + kstat_kv_unit_init(&txkvs->tso_packets, "TSO packets", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); + kstat_kv_unit_init(&txkvs->tso_bytes, "TSO bytes", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES); + kstat_kv_unit_init(&txkvs->ucast_packets, "ucast packets", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); + kstat_kv_unit_init(&txkvs->ucast_bytes, "ucast bytes", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES); + kstat_kv_unit_init(&txkvs->mcast_packets, "mcast packets", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); + kstat_kv_unit_init(&txkvs->mcast_bytes, "mcast bytes", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES); + kstat_kv_unit_init(&txkvs->bcast_packets, "bcast packets", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); + kstat_kv_unit_init(&txkvs->bcast_bytes, "bcast bytes", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES); + kstat_kv_unit_init(&txkvs->errors, "errors", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); + kstat_kv_unit_init(&txkvs->discards, "discards", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); + + kstat_kv_unit_init(&rxkvs->lro_packets, "LRO packets", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); + kstat_kv_unit_init(&rxkvs->lro_bytes, "LRO bytes", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES); + kstat_kv_unit_init(&rxkvs->ucast_packets, "ucast packets", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); + kstat_kv_unit_init(&rxkvs->ucast_bytes, "ucast bytes", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES); + kstat_kv_unit_init(&rxkvs->mcast_packets, "mcast packets", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); + kstat_kv_unit_init(&rxkvs->mcast_bytes, "mcast bytes", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES); + kstat_kv_unit_init(&rxkvs->bcast_packets, "bcast packets", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); + kstat_kv_unit_init(&rxkvs->bcast_bytes, "bcast bytes", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES); + kstat_kv_unit_init(&rxkvs->nobuffers, "no buffers", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); + kstat_kv_unit_init(&rxkvs->errors, "errors", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); +} + +static int +vmx_txstats_read(struct kstat *ks) +{ + struct vmxnet3_txqueue *tq = ks->ks_softc; + struct vmxnet3_softc *sc = tq->sc; + struct vmx_txstats_kv *txkvs = ks->ks_data; + struct UPT1_TxStats *txstats = &tq->ts->stats; + + if (ratecheck(&sc->sc_kstats.updated, &vmx_kstat_rate)) + WRITE_CMD(sc, VMXNET3_CMD_GET_STATS); + + txkvs->tso_packets.kv_v.v_u64 = txstats->TSO_packets; + txkvs->tso_bytes.kv_v.v_u64 = txstats->TSO_bytes; + txkvs->ucast_packets.kv_v.v_u64 = txstats->ucast_packets; + txkvs->ucast_bytes.kv_v.v_u64 = txstats->ucast_bytes; + txkvs->mcast_packets.kv_v.v_u64 = txstats->mcast_packets; + txkvs->mcast_bytes.kv_v.v_u64 = txstats->mcast_bytes; + txkvs->bcast_packets.kv_v.v_u64 = txstats->bcast_packets; + txkvs->bcast_bytes.kv_v.v_u64 = txstats->bcast_bytes; + txkvs->errors.kv_v.v_u64 = txstats->error; + txkvs->discards.kv_v.v_u64 = txstats->discard; + + TIMEVAL_TO_TIMESPEC(&sc->sc_kstats.updated, &ks->ks_updated); + + return (0); +} + +static void +vmx_kstat_txstats(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq, int i) +{ + tq->sc = sc; + + tq->txkstat = kstat_create(sc->sc_dev.dv_xname, 0, "vmx-txstats", i, + KSTAT_T_KV, 0); + if (tq->txkstat == NULL) + return; + + kstat_set_wlock(tq->txkstat, &sc->sc_kstats.lock); + + tq->txkstat->ks_softc = tq; + tq->txkstat->ks_data = &sc->sc_kstats.txstats; + tq->txkstat->ks_datalen = sizeof(sc->sc_kstats.txstats); + tq->txkstat->ks_read = vmx_txstats_read; + TIMEVAL_TO_TIMESPEC(&vmx_kstat_rate, &tq->txkstat->ks_interval); + + kstat_install(tq->txkstat); +} + +static int +vmx_rxstats_read(struct kstat *ks) +{ + struct vmxnet3_rxqueue *rq = ks->ks_softc; + struct vmxnet3_softc *sc = rq->sc; + struct vmx_rxstats_kv *rxkvs = ks->ks_data; + struct UPT1_RxStats *rxstats = &rq->rs->stats; + + if (ratecheck(&sc->sc_kstats.updated, &vmx_kstat_rate)) + WRITE_CMD(sc, VMXNET3_CMD_GET_STATS); + + rxkvs->lro_packets.kv_v.v_u64 = rxstats->LRO_packets; + rxkvs->lro_bytes.kv_v.v_u64 = rxstats->LRO_bytes; + rxkvs->ucast_packets.kv_v.v_u64 = rxstats->ucast_packets; + rxkvs->ucast_bytes.kv_v.v_u64 = rxstats->ucast_bytes; + rxkvs->mcast_packets.kv_v.v_u64 = rxstats->mcast_packets; + rxkvs->mcast_bytes.kv_v.v_u64 = rxstats->mcast_bytes; + rxkvs->bcast_packets.kv_v.v_u64 = rxstats->bcast_packets; + rxkvs->bcast_bytes.kv_v.v_u64 = rxstats->bcast_bytes; + rxkvs->nobuffers.kv_v.v_u64 = rxstats->nobuffer; + rxkvs->errors.kv_v.v_u64 = rxstats->error; + + TIMEVAL_TO_TIMESPEC(&sc->sc_kstats.updated, &ks->ks_updated); + + return (0); +} + +static void +vmx_kstat_rxstats(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq, int i) +{ + rq->sc = sc; + + rq->rxkstat = kstat_create(sc->sc_dev.dv_xname, 0, "vmx-rxstats", i, + KSTAT_T_KV, 0); + if (rq->rxkstat == NULL) + return; + + kstat_set_wlock(rq->rxkstat, &rq->sc->sc_kstats.lock); + + rq->rxkstat->ks_softc = rq; + rq->rxkstat->ks_data = &sc->sc_kstats.rxstats; + rq->rxkstat->ks_datalen = sizeof(sc->sc_kstats.rxstats); + rq->rxkstat->ks_read = vmx_rxstats_read; + TIMEVAL_TO_TIMESPEC(&vmx_kstat_rate, &rq->rxkstat->ks_interval); + + kstat_install(rq->rxkstat); +} +#endif + void vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq) { @@ -737,8 +994,11 @@ vmxnet3_enable_all_intrs(struct vmxnet3_ int i; sc->sc_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL; - for (i = 0; i < sc->sc_nintr; i++) - vmxnet3_enable_intr(sc, i); + vmxnet3_enable_intr(sc, 0); + if (sc->sc_intrmap) { + for (i = 0; i < sc->sc_nqueues; i++) + vmxnet3_enable_intr(sc, sc->sc_q[i].intr); + } } void @@ -747,8 +1007,11 @@ vmxnet3_disable_all_intrs(struct vmxnet3 int i; sc->sc_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL; - for (i = 0; i < sc->sc_nintr; i++) - vmxnet3_disable_intr(sc, i); + vmxnet3_disable_intr(sc, 0); + if (sc->sc_intrmap) { + for (i = 0; i < sc->sc_nqueues; i++) + vmxnet3_disable_intr(sc, sc->sc_q[i].intr); + } } int @@ -965,6 +1228,10 @@ vmxnet3_rxintr(struct vmxnet3_softc *sc, m->m_pkthdr.ether_vtag = letoh32((rxcd->rxc_word2 >> VMXNET3_RXC_VLANTAG_S) & VMXNET3_RXC_VLANTAG_M); } + if (((letoh32(rxcd->rxc_word0) >> VMXNET3_RXC_RSSTYPE_S) & + VMXNET3_RXC_RSSTYPE_M) != VMXNET3_RXC_RSSTYPE_NONE) { + m->m_pkthdr.ph_flowid = letoh32(rxcd->rxc_word1); + } ml_enqueue(&ml, m); @@ -1087,12 +1354,11 @@ vmxnet3_stop(struct ifnet *ifp) WRITE_CMD(sc, VMXNET3_CMD_DISABLE); - if (sc->sc_nintr == 1) - intr_barrier(sc->sc_ih); - else { + if (sc->sc_intrmap) { for (queue = 0; queue < sc->sc_nqueues; queue++) - intr_barrier(sc->sc_qih[queue]); - } + intr_barrier(sc->sc_q[queue].ih); + } else + intr_barrier(sc->sc_ih); for (queue = 0; queue < sc->sc_nqueues; queue++) vmxnet3_txstop(sc, &sc->sc_q[queue].tx); Index: arch/amd64/amd64/acpi_machdep.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v retrieving revision 1.90 diff -u -p -r1.90 acpi_machdep.c --- arch/amd64/amd64/acpi_machdep.c 12 Apr 2020 09:21:19 -0000 1.90 +++ arch/amd64/amd64/acpi_machdep.c 10 Jun 2020 08:19:08 -0000 @@ -195,7 +195,7 @@ acpi_intr_establish(int irq, int flags, type = (flags & LR_EXTIRQ_MODE) ? IST_EDGE : IST_LEVEL; return (intr_establish(-1, (struct pic *)apic, map->ioapic_pin, - type, level, handler, arg, what)); + type, level, NULL, handler, arg, what)); #else return NULL; #endif Index: arch/amd64/amd64/conf.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/conf.c,v retrieving revision 1.70 diff -u -p -r1.70 conf.c --- arch/amd64/amd64/conf.c 25 May 2020 06:37:52 -0000 1.70 +++ arch/amd64/amd64/conf.c 10 Jun 2020 08:19:08 -0000 @@ -142,6 +142,7 @@ cdev_decl(cy); #include "pctr.h" #include "bktr.h" #include "ksyms.h" +#include "kstat.h" #include "usb.h" #include "uhid.h" #include "fido.h" @@ -238,7 +239,7 @@ struct cdevsw cdevsw[] = cdev_notdef(), /* 48 */ cdev_bktr_init(NBKTR,bktr), /* 49: Bt848 video capture device */ cdev_ksyms_init(NKSYMS,ksyms), /* 50: Kernel symbols device */ - cdev_notdef(), /* 51 */ + cdev_kstat_init(NKSTAT,kstat), /* 51: Kernel statistics */ cdev_midi_init(NMIDI,midi), /* 52: MIDI I/O */ cdev_notdef(), /* 53 was: sequencer I/O */ cdev_notdef(), /* 54 was: RAIDframe disk driver */ Index: arch/amd64/amd64/intr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/intr.c,v retrieving revision 1.52 diff -u -p -r1.52 intr.c --- arch/amd64/amd64/intr.c 25 Mar 2019 18:45:27 -0000 1.52 +++ arch/amd64/amd64/intr.c 10 Jun 2020 08:19:08 -0000 @@ -282,13 +282,21 @@ duplicate: } else { other: /* - * Otherwise, look for a free slot elsewhere. Do the primary - * CPU first. + * Otherwise, look for a free slot elsewhere. If cip is null, it + * means try primary cpu but accept secondary, otherwise we need + * a slot on the requested cpu. */ - ci = &cpu_info_primary; + if (*cip == NULL) + ci = &cpu_info_primary; + else + ci = *cip; + error = intr_allocate_slot_cpu(ci, pic, pin, &slot); if (error == 0) goto found; + /* Can't alloc on the requested cpu, fail. */ + if (*cip != NULL) + return EBUSY; /* * ..now try the others. @@ -323,10 +331,9 @@ int intr_shared_edge; void * intr_establish(int legacy_irq, struct pic *pic, int pin, int type, int level, - int (*handler)(void *), void *arg, const char *what) + struct cpu_info *ci, int (*handler)(void *), void *arg, const char *what) { struct intrhand **p, *q, *ih; - struct cpu_info *ci; int slot, error, idt_vec; struct intrsource *source; struct intrstub *stubp; @@ -674,9 +681,10 @@ intr_printconfig(void) } void -intr_barrier(void *ih) +intr_barrier(void *cookie) { - sched_barrier(NULL); + struct intrhand *ih = cookie; + sched_barrier(ih->ih_cpu); } /* Index: arch/amd64/include/intr.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/intr.h,v retrieving revision 1.31 diff -u -p -r1.31 intr.h --- arch/amd64/include/intr.h 21 Dec 2018 01:51:07 -0000 1.31 +++ arch/amd64/include/intr.h 10 Jun 2020 08:19:08 -0000 @@ -201,8 +201,8 @@ void intr_calculatemasks(struct cpu_info int intr_allocate_slot_cpu(struct cpu_info *, struct pic *, int, int *); int intr_allocate_slot(struct pic *, int, int, int, struct cpu_info **, int *, int *); -void *intr_establish(int, struct pic *, int, int, int, int (*)(void *), - void *, const char *); +void *intr_establish(int, struct pic *, int, int, int, + struct cpu_info *, int (*)(void *), void *, const char *); void intr_disestablish(struct intrhand *); int intr_handler(struct intrframe *, struct intrhand *); void cpu_intr_init(struct cpu_info *); Index: arch/amd64/include/pci_machdep.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/pci_machdep.h,v retrieving revision 1.28 diff -u -p -r1.28 pci_machdep.h --- arch/amd64/include/pci_machdep.h 25 Jun 2019 16:46:32 -0000 1.28 +++ arch/amd64/include/pci_machdep.h 10 Jun 2020 08:19:08 -0000 @@ -87,6 +87,8 @@ int pci_intr_map(struct pci_attach_args const char *pci_intr_string(pci_chipset_tag_t, pci_intr_handle_t); void *pci_intr_establish(pci_chipset_tag_t, pci_intr_handle_t, int, int (*)(void *), void *, const char *); +void *pci_intr_establish_cpuid(pci_chipset_tag_t, pci_intr_handle_t, + int, int (*)(void *), void *, const char *, int); void pci_intr_disestablish(pci_chipset_tag_t, void *); #define pci_probe_device_hook(c, a) (0) Index: arch/amd64/isa/isa_machdep.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/isa/isa_machdep.c,v retrieving revision 1.29 diff -u -p -r1.29 isa_machdep.c --- arch/amd64/isa/isa_machdep.c 14 Oct 2017 04:44:43 -0000 1.29 +++ arch/amd64/isa/isa_machdep.c 10 Jun 2020 08:19:08 -0000 @@ -312,7 +312,7 @@ isa_intr_establish(isa_chipset_tag_t ic, KASSERT(pic); - return intr_establish(irq, pic, pin, type, level, ih_fun, + return intr_establish(irq, pic, pin, type, level, NULL, ih_fun, ih_arg, ih_what); } Index: arch/amd64/pci/pci_machdep.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/pci/pci_machdep.c,v retrieving revision 1.74 diff -u -p -r1.74 pci_machdep.c --- arch/amd64/pci/pci_machdep.c 14 May 2020 13:07:11 -0000 1.74 +++ arch/amd64/pci/pci_machdep.c 10 Jun 2020 08:19:08 -0000 @@ -669,18 +669,36 @@ void * pci_intr_establish(pci_chipset_tag_t pc, pci_intr_handle_t ih, int level, int (*func)(void *), void *arg, const char *what) { + return pci_intr_establish_cpuid(pc, ih, level, func, arg, what, -1); +} + +void * +pci_intr_establish_cpuid(pci_chipset_tag_t pc, pci_intr_handle_t ih, int level, + int (*func)(void *), void *arg, const char *what, int cpuid) +{ int pin, irq; int bus, dev; pcitag_t tag = ih.tag; struct pic *pic; + struct cpu_info *ci = NULL; + CPU_INFO_ITERATOR cii; + + if (cpuid != -1) { + CPU_INFO_FOREACH(cii, ci) { + if (ci->ci_cpuid == cpuid) + break; + } + if (ci == NULL) + return (NULL); + } if (ih.line & APIC_INT_VIA_MSG) { return intr_establish(-1, &msi_pic, tag, IST_PULSE, level, - func, arg, what); + ci, func, arg, what); } if (ih.line & APIC_INT_VIA_MSGX) { return intr_establish(-1, &msix_pic, tag, IST_PULSE, level, - func, arg, what); + ci, func, arg, what); } pci_decompose_tag(pc, ih.tag, &bus, &dev, NULL); @@ -706,7 +724,8 @@ pci_intr_establish(pci_chipset_tag_t pc, } #endif - return intr_establish(irq, pic, pin, IST_LEVEL, level, func, arg, what); + return intr_establish(irq, pic, pin, IST_LEVEL, level, ci, + func, arg, what); } void Index: conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v retrieving revision 1.269 diff -u -p -r1.269 GENERIC --- conf/GENERIC 9 May 2020 19:48:45 -0000 1.269 +++ conf/GENERIC 10 Jun 2020 08:19:09 -0000 @@ -82,6 +82,7 @@ pseudo-device msts 1 # MSTS line discipl pseudo-device endrun 1 # EndRun line discipline pseudo-device vnd 4 # vnode disk devices pseudo-device ksyms 1 # kernel symbols device +pseudo-device kstat 1 # kernel statistics #pseudo-device dt # Dynamic Tracer # clonable devices Index: conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.686 diff -u -p -r1.686 files --- conf/files 15 Apr 2020 09:26:49 -0000 1.686 +++ conf/files 10 Jun 2020 08:19:09 -0000 @@ -20,6 +20,7 @@ define i2cbus {} define gpiobus {} define onewirebus {} define video {} +define intrmap # filesystem firmware loading attribute define firmload @@ -62,6 +63,7 @@ define ether define mpls define sppp define wlan +define stoeplitz # "Chipset" attributes. These are the machine-independent portions # of device drivers. @@ -570,6 +572,9 @@ pseudo-device switch: ifnet, ether pseudo-device ksyms file dev/ksyms.c ksyms needs-flag +pseudo-device kstat +file dev/kstat.c kstat needs-flag + pseudo-device fuse file miscfs/fuse/fuse_device.c fuse needs-flag file miscfs/fuse/fuse_file.c fuse @@ -691,6 +696,7 @@ file kern/kern_resource.c file kern/kern_pledge.c file kern/kern_unveil.c file kern/kern_sched.c +file kern/kern_intrmap.c intrmap file kern/kern_sensors.c file kern/kern_sig.c file kern/kern_smr.c @@ -826,6 +832,7 @@ file net/if_pair.c pair file net/if_pppx.c pppx needs-count file net/if_vxlan.c vxlan needs-count file net/bfd.c bfd +file net/toeplitz.c stoeplitz needs-flag file net80211/ieee80211.c wlan file net80211/ieee80211_amrr.c wlan file net80211/ieee80211_crypto.c wlan Index: dev/kstat.c =================================================================== RCS file: dev/kstat.c diff -N dev/kstat.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ dev/kstat.c 10 Jun 2020 08:19:09 -0000 @@ -0,0 +1,626 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2020 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include + +RBT_HEAD(kstat_id_tree, kstat); + +static inline int +kstat_id_cmp(const struct kstat *a, const struct kstat *b) +{ + if (a->ks_id > b->ks_id) + return (1); + if (a->ks_id < b->ks_id) + return (-1); + + return (0); +} + +RBT_PROTOTYPE(kstat_id_tree, kstat, ks_id_entry, kstat_id_cmp); + +RBT_HEAD(kstat_pv_tree, kstat); + +static inline int +kstat_pv_cmp(const struct kstat *a, const struct kstat *b) +{ + int rv; + + rv = strcmp(a->ks_provider, b->ks_provider); + if (rv != 0) + return (rv); + + if (a->ks_instance > b->ks_instance) + return (1); + if (a->ks_instance < b->ks_instance) + return (-1); + + rv = strcmp(a->ks_name, b->ks_name); + if (rv != 0) + return (rv); + + if (a->ks_unit > b->ks_unit) + return (1); + if (a->ks_unit < b->ks_unit) + return (-1); + + return (0); +} + +RBT_PROTOTYPE(kstat_pv_tree, kstat, ks_pv_entry, kstat_pv_cmp); + +RBT_HEAD(kstat_nm_tree, kstat); + +static inline int +kstat_nm_cmp(const struct kstat *a, const struct kstat *b) +{ + int rv; + + rv = strcmp(a->ks_name, b->ks_name); + if (rv != 0) + return (rv); + + if (a->ks_unit > b->ks_unit) + return (1); + if (a->ks_unit < b->ks_unit) + return (-1); + + rv = strcmp(a->ks_provider, b->ks_provider); + if (rv != 0) + return (rv); + + if (a->ks_instance > b->ks_instance) + return (1); + if (a->ks_instance < b->ks_instance) + return (-1); + + return (0); +} + +RBT_PROTOTYPE(kstat_nm_tree, kstat, ks_nm_entry, kstat_nm_cmp); + +struct kstat_lock_ops { + void (*enter)(void *); + void (*leave)(void *); +}; + +#define kstat_enter(_ks) (_ks)->ks_lock_ops->enter((_ks)->ks_lock) +#define kstat_leave(_ks) (_ks)->ks_lock_ops->leave((_ks)->ks_lock) + +static const struct kstat_lock_ops kstat_rlock_ops = { + (void (*)(void *))rw_enter_read, + (void (*)(void *))rw_exit_read, +}; + +static const struct kstat_lock_ops kstat_wlock_ops = { + (void (*)(void *))rw_enter_write, + (void (*)(void *))rw_exit_write, +}; + +static const struct kstat_lock_ops kstat_mutex_ops = { + (void (*)(void *))mtx_enter, + (void (*)(void *))mtx_leave, +}; + +static struct rwlock kstat_lock = RWLOCK_INITIALIZER("kstat"); + +/* + * The global state is versioned so changes to the set of kstats + * can be detected. This is an int so it can be read atomically on + * any arch, which is ridiculous optimisation, really. + */ +static unsigned int kstat_version = 0; + +/* + * kstat structures have a unique identifier so they can be found + * quickly. Identifiers are 64bit in the hope that it won't wrap + * during the runtime of a system. The identifiers start at 1 so that + * 0 can be used as the first value for userland to iterate with. + */ +static uint64_t kstat_next_id = 1; + +static struct kstat_id_tree kstat_id_tree = RBT_INITIALIZER(); +static struct kstat_pv_tree kstat_pv_tree = RBT_INITIALIZER(); +static struct kstat_nm_tree kstat_nm_tree = RBT_INITIALIZER(); +static struct pool kstat_pool; + +static struct rwlock kstat_default_lock = + RWLOCK_INITIALIZER("kstatlk"); + +static int kstat_read(struct kstat *); +static int kstat_copy(struct kstat *, void *); + +int +kstatattach(int num) +{ + /* XXX install system stats here */ + return (0); +} + +int +kstatopen(dev_t dev, int flag, int mode, struct proc *p) +{ + return (0); +} + +int +kstatclose(dev_t dev, int flag, int mode, struct proc *p) +{ + return (0); +} + +static int +kstatioc_enter(struct kstat_req *ksreq) +{ + int error; + + error = rw_enter(&kstat_lock, RW_READ | RW_INTR); + if (error != 0) + return (error); + + if (!ISSET(ksreq->ks_rflags, KSTATIOC_F_IGNVER) && + ksreq->ks_version != kstat_version) { + error = EBUSY; + goto error; + } + + return (0); + +error: + rw_exit(&kstat_lock); + return (error); +} + +#define sstrlcpy(_dst, _src) \ + (strlcpy((_dst), (_src), sizeof((_dst))) >= sizeof((_dst))) + +static int +kstatioc_leave(struct kstat_req *ksreq, struct kstat *ks) +{ + void *buf = NULL; + size_t klen = 0, ulen = 0; + struct timespec updated; + int error = 0; + + if (ks == NULL) { + error = ENOENT; + goto error; + } + + switch (ks->ks_state) { + case KSTAT_S_CREATED: + ksreq->ks_updated = ks->ks_created; + ksreq->ks_interval.tv_sec = 0; + ksreq->ks_interval.tv_nsec = 0; + ksreq->ks_datalen = 0; + ksreq->ks_dataver = 0; + break; + + case KSTAT_S_INSTALLED: + ksreq->ks_dataver = ks->ks_dataver; + ksreq->ks_interval = ks->ks_interval; + + if (ksreq->ks_data == NULL) { + /* userland doesn't want actual data, so shortcut */ + kstat_enter(ks); + ksreq->ks_datalen = ks->ks_datalen; + ksreq->ks_updated = ks->ks_updated; + kstat_leave(ks); + break; + } + + klen = ks->ks_datalen; /* KSTAT_F_REALLOC */ + buf = malloc(klen, M_TEMP, M_WAITOK|M_CANFAIL); + if (buf == NULL) { + error = ENOMEM; + goto error; + } + + kstat_enter(ks); + error = (*ks->ks_read)(ks); + if (error == 0) { + updated = ks->ks_updated; + + /* KSTAT_F_REALLOC */ + KASSERTMSG(ks->ks_datalen == klen, + "kstat doesnt support resized data yet"); + + error = (*ks->ks_copy)(ks, buf); + } + kstat_leave(ks); + + if (error != 0) + goto error; + + ulen = ksreq->ks_datalen; + ksreq->ks_datalen = klen; /* KSTAT_F_REALLOC */ + ksreq->ks_updated = updated; + break; + default: + panic("ks %p unexpected state %u", ks, ks->ks_state); + } + + ksreq->ks_version = kstat_version; + ksreq->ks_id = ks->ks_id; + + if (sstrlcpy(ksreq->ks_provider, ks->ks_provider) != 0) + panic("kstat provider string has grown"); + ksreq->ks_instance = ks->ks_instance; + if (sstrlcpy(ksreq->ks_name, ks->ks_name) != 0) + panic("kstat name string has grown"); + ksreq->ks_unit = ks->ks_unit; + + ksreq->ks_created = ks->ks_created; + ksreq->ks_type = ks->ks_type; + ksreq->ks_state = ks->ks_state; + +error: + rw_exit(&kstat_lock); + + if (buf != NULL) { + if (error == 0) + error = copyout(buf, ksreq->ks_data, min(klen, ulen)); + + free(buf, M_TEMP, klen); + } + + return (error); +} + +static int +kstatioc_find_id(struct kstat_req *ksreq) +{ + struct kstat *ks, key; + int error; + + error = kstatioc_enter(ksreq); + if (error != 0) + return (error); + + key.ks_id = ksreq->ks_id; + + ks = RBT_FIND(kstat_id_tree, &kstat_id_tree, &key); + + return (kstatioc_leave(ksreq, ks)); +} + +static int +kstatioc_nfind_id(struct kstat_req *ksreq) +{ + struct kstat *ks, key; + int error; + + error = kstatioc_enter(ksreq); + if (error != 0) + return (error); + + key.ks_id = ksreq->ks_id; + + ks = RBT_NFIND(kstat_id_tree, &kstat_id_tree, &key); + + return (kstatioc_leave(ksreq, ks)); +} + +static int +kstatioc_find_pv(struct kstat_req *ksreq) +{ + struct kstat *ks, key; + int error; + + error = kstatioc_enter(ksreq); + if (error != 0) + return (error); + + key.ks_provider = ksreq->ks_provider; + key.ks_instance = ksreq->ks_instance; + key.ks_name = ksreq->ks_name; + key.ks_unit = ksreq->ks_unit; + + ks = RBT_FIND(kstat_pv_tree, &kstat_pv_tree, &key); + + return (kstatioc_leave(ksreq, ks)); +} + +static int +kstatioc_nfind_pv(struct kstat_req *ksreq) +{ + struct kstat *ks, key; + int error; + + error = kstatioc_enter(ksreq); + if (error != 0) + return (error); + + key.ks_provider = ksreq->ks_provider; + key.ks_instance = ksreq->ks_instance; + key.ks_name = ksreq->ks_name; + key.ks_unit = ksreq->ks_unit; + + ks = RBT_NFIND(kstat_pv_tree, &kstat_pv_tree, &key); + + return (kstatioc_leave(ksreq, ks)); +} + +static int +kstatioc_find_nm(struct kstat_req *ksreq) +{ + struct kstat *ks, key; + int error; + + error = kstatioc_enter(ksreq); + if (error != 0) + return (error); + + key.ks_name = ksreq->ks_name; + key.ks_unit = ksreq->ks_unit; + key.ks_provider = ksreq->ks_provider; + key.ks_instance = ksreq->ks_instance; + + ks = RBT_FIND(kstat_nm_tree, &kstat_nm_tree, &key); + + return (kstatioc_leave(ksreq, ks)); +} + +static int +kstatioc_nfind_nm(struct kstat_req *ksreq) +{ + struct kstat *ks, key; + int error; + + error = kstatioc_enter(ksreq); + if (error != 0) + return (error); + + key.ks_name = ksreq->ks_name; + key.ks_unit = ksreq->ks_unit; + key.ks_provider = ksreq->ks_provider; + key.ks_instance = ksreq->ks_instance; + + ks = RBT_NFIND(kstat_nm_tree, &kstat_nm_tree, &key); + + return (kstatioc_leave(ksreq, ks)); +} + +int +kstatioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) +{ + struct kstat_req *ksreq = (struct kstat_req *)data; + int error = 0; + + KERNEL_UNLOCK(); + + switch (cmd) { + case KSTATIOC_VERSION: + *(unsigned int *)data = kstat_version; + break; + + case KSTATIOC_FIND_ID: + error = kstatioc_find_id(ksreq); + break; + case KSTATIOC_NFIND_ID: + error = kstatioc_nfind_id(ksreq); + break; + case KSTATIOC_FIND_PROVIDER: + error = kstatioc_find_pv(ksreq); + break; + case KSTATIOC_NFIND_PROVIDER: + error = kstatioc_nfind_pv(ksreq); + break; + case KSTATIOC_FIND_NAME: + error = kstatioc_find_nm(ksreq); + break; + case KSTATIOC_NFIND_NAME: + error = kstatioc_nfind_nm(ksreq); + break; + + default: + error = ENOTTY; + break; + } + + KERNEL_LOCK(); + + return (error); +} + +static void +kstat_init(void) +{ + static int initialized = 0; + + if (initialized) + return; + + pool_init(&kstat_pool, sizeof(struct kstat), 0, IPL_NONE, + PR_WAITOK | PR_RWLOCK, "kstatmem", NULL); + + initialized = 1; +} + +struct kstat * +kstat_create(const char *provider, unsigned int instance, + const char *name, unsigned int unit, + unsigned int type, unsigned int flags) +{ + struct kstat *ks, *oks; + + KASSERT(strlen(provider) <= KSTAT_STRLEN); + KASSERT(strlen(name) <= KSTAT_STRLEN); + + kstat_init(); + + ks = pool_get(&kstat_pool, PR_WAITOK|PR_ZERO); + + ks->ks_provider = provider; + ks->ks_instance = instance; + ks->ks_name = name; + ks->ks_unit = unit; + ks->ks_flags = flags; + ks->ks_type = type; + ks->ks_state = KSTAT_S_CREATED; + + getnanouptime(&ks->ks_created); + ks->ks_updated = ks->ks_created; + + ks->ks_lock = &kstat_default_lock; + ks->ks_lock_ops = &kstat_wlock_ops; + ks->ks_read = kstat_read; + ks->ks_copy = kstat_copy; + + rw_enter_write(&kstat_lock); + ks->ks_id = kstat_next_id; + + oks = RBT_INSERT(kstat_pv_tree, &kstat_pv_tree, ks); + if (oks == NULL) { + /* commit */ + kstat_next_id++; + kstat_version++; + + oks = RBT_INSERT(kstat_nm_tree, &kstat_nm_tree, ks); + if (oks != NULL) + panic("kstat name collision! (%llu)", ks->ks_id); + + oks = RBT_INSERT(kstat_id_tree, &kstat_id_tree, ks); + if (oks != NULL) + panic("kstat id collision! (%llu)", ks->ks_id); + } + rw_exit_write(&kstat_lock); + + if (oks != NULL) { + pool_put(&kstat_pool, ks); + return (NULL); + } + + return (ks); +} + +void +kstat_set_rlock(struct kstat *ks, struct rwlock *rwl) +{ + KASSERT(ks->ks_state == KSTAT_S_CREATED); + + ks->ks_lock = rwl; + ks->ks_lock_ops = &kstat_rlock_ops; +} + +void +kstat_set_wlock(struct kstat *ks, struct rwlock *rwl) +{ + KASSERT(ks->ks_state == KSTAT_S_CREATED); + + ks->ks_lock = rwl; + ks->ks_lock_ops = &kstat_wlock_ops; +} + +void +kstat_set_mutex(struct kstat *ks, struct mutex *mtx) +{ + KASSERT(ks->ks_state == KSTAT_S_CREATED); + + ks->ks_lock = mtx; + ks->ks_lock_ops = &kstat_mutex_ops; +} + +int +kstat_read_nop(struct kstat *ks) +{ + return (0); +} + +void +kstat_install(struct kstat *ks) +{ + if (!ISSET(ks->ks_flags, KSTAT_F_REALLOC)) { + KASSERTMSG(ks->ks_copy != NULL || ks->ks_data != NULL, + "kstat %s:%u:%s:%u must provide ks_copy or ks_data", + ks->ks_provider, ks->ks_instance, ks->ks_name, ks->ks_unit); + KASSERT(ks->ks_datalen > 0); + } + + rw_enter_write(&kstat_lock); + ks->ks_state = KSTAT_S_INSTALLED; + rw_exit_write(&kstat_lock); +} + +void +kstat_destroy(struct kstat *ks) +{ + rw_enter_write(&kstat_lock); + RBT_REMOVE(kstat_id_tree, &kstat_id_tree, ks); + RBT_REMOVE(kstat_pv_tree, &kstat_pv_tree, ks); + RBT_REMOVE(kstat_nm_tree, &kstat_nm_tree, ks); + kstat_version++; + rw_exit_write(&kstat_lock); + + pool_put(&kstat_pool, ks); +} + +static int +kstat_read(struct kstat *ks) +{ + getnanouptime(&ks->ks_updated); + return (0); +} + +static int +kstat_copy(struct kstat *ks, void *buf) +{ + memcpy(buf, ks->ks_data, ks->ks_datalen); + return (0); +} + +RBT_GENERATE(kstat_id_tree, kstat, ks_id_entry, kstat_id_cmp); +RBT_GENERATE(kstat_pv_tree, kstat, ks_pv_entry, kstat_pv_cmp); +RBT_GENERATE(kstat_nm_tree, kstat, ks_nm_entry, kstat_nm_cmp); + +void +kstat_kv_init(struct kstat_kv *kv, const char *name, enum kstat_kv_type type) +{ + memset(kv, 0, sizeof(*kv)); + strlcpy(kv->kv_key, name, sizeof(kv->kv_key)); /* XXX truncated? */ + kv->kv_type = type; + kv->kv_unit = KSTAT_KV_U_NONE; +} + +void +kstat_kv_unit_init(struct kstat_kv *kv, const char *name, + enum kstat_kv_type type, enum kstat_kv_unit unit) +{ + switch (type) { + case KSTAT_KV_T_COUNTER64: + case KSTAT_KV_T_COUNTER32: + case KSTAT_KV_T_UINT64: + case KSTAT_KV_T_INT64: + case KSTAT_KV_T_UINT32: + case KSTAT_KV_T_INT32: + break; + default: + panic("kv unit init %s: unit for non-integer type", name); + } + + memset(kv, 0, sizeof(*kv)); + strlcpy(kv->kv_key, name, sizeof(kv->kv_key)); /* XXX truncated? */ + kv->kv_type = type; + kv->kv_unit = unit; +} Index: dev/pci/files.pci =================================================================== RCS file: /cvs/src/sys/dev/pci/files.pci,v retrieving revision 1.345 diff -u -p -r1.345 files.pci --- dev/pci/files.pci 6 Mar 2020 08:39:34 -0000 1.345 +++ dev/pci/files.pci 10 Jun 2020 08:19:09 -0000 @@ -718,7 +718,7 @@ attach vic at pci file dev/pci/if_vic.c vic # VMware VMXNET3 virtual interface -device vmx: ether, ifnet, ifmedia +device vmx: ether, ifnet, ifmedia, stoeplitz, intrmap attach vmx at pci file dev/pci/if_vmx.c vmx Index: dev/pci/if_vmxreg.h =================================================================== RCS file: /cvs/src/sys/dev/pci/if_vmxreg.h,v retrieving revision 1.6 diff -u -p -r1.6 if_vmxreg.h --- dev/pci/if_vmxreg.h 27 Oct 2019 22:24:40 -0000 1.6 +++ dev/pci/if_vmxreg.h 10 Jun 2020 08:19:09 -0000 @@ -73,6 +73,7 @@ struct UPT1_RxStats { #define VMXNET3_CMD_SET_RXMODE 0xcafe0003 /* set interface flags */ #define VMXNET3_CMD_SET_FILTER 0xcafe0004 /* set address filter */ #define VMXNET3_CMD_GET_STATUS 0xf00d0000 /* get queue errors */ +#define VMXNET3_CMD_GET_STATS 0xf00d0001 #define VMXNET3_CMD_GET_LINK 0xf00d0002 /* get link status */ #define VMXNET3_CMD_GET_MACL 0xf00d0003 #define VMXNET3_CMD_GET_MACH 0xf00d0004 @@ -177,6 +178,7 @@ struct vmxnet3_rxcompdesc { #define VMXNET3_RXC_QID_S 16 #define VMXNET3_RXC_RSSTYPE_M 0x0000000f #define VMXNET3_RXC_RSSTYPE_S 26 +#define VMXNET3_RXC_RSSTYPE_NONE 0 #define VMXNET3_RXC_NOCSUM 0x40000000 /* no checksum calculated */ #define VMXNET3_RXC_RES1 0x80000000 @@ -223,7 +225,6 @@ struct vmxnet3_rxcompdesc { #define VMXNET3_MAX_TX_QUEUES 8 #define VMXNET3_MAX_RX_QUEUES 16 #define VMXNET3_MAX_INTRS (VMXNET3_MAX_TX_QUEUES + VMXNET3_MAX_RX_QUEUES + 1) -#define VMXNET3_NINTR 1 #define VMXNET3_ICTRL_DISABLE_ALL 0x01 @@ -334,4 +335,22 @@ struct vmxnet3_rxq_shared { struct UPT1_RxStats stats; u_int8_t pad4[88]; +} __packed; + +#define UPT1_RSS_MAX_KEY_SIZE 40 +#define UPT1_RSS_MAX_IND_TABLE_SIZE 128 + +struct vmxnet3_upt1_rss_conf { + u_int16_t hash_type; +#define UPT1_RSS_HASH_TYPE_NONE 0 +#define UPT1_RSS_HASH_TYPE_IPV4 1 +#define UPT1_RSS_HASH_TYPE_TCP_IPV4 2 +#define UPT1_RSS_HASH_TYPE_IPV6 4 +#define UPT1_RSS_HASH_TYPE_TCP_IPV6 8 + u_int16_t hash_func; +#define UPT1_RSS_HASH_FUNC_TOEPLITZ 1 + u_int16_t hash_key_size; + u_int16_t ind_table_size; + u_int8_t hash_key[UPT1_RSS_MAX_KEY_SIZE]; + u_int8_t ind_table[UPT1_RSS_MAX_IND_TABLE_SIZE]; } __packed; Index: dev/pci/pci.c =================================================================== RCS file: /cvs/src/sys/dev/pci/pci.c,v retrieving revision 1.115 diff -u -p -r1.115 pci.c --- dev/pci/pci.c 15 Jan 2020 14:01:19 -0000 1.115 +++ dev/pci/pci.c 10 Jun 2020 08:19:09 -0000 @@ -1625,7 +1625,18 @@ pci_resume_msix(pci_chipset_tag_t pc, pc pci_conf_write(pc, tag, off, mc); } -#else +int +pci_intr_msix_count(pci_chipset_tag_t pc, pcitag_t tag) +{ + pcireg_t reg; + + if (pci_get_capability(pc, tag, PCI_CAP_MSIX, NULL, ®) == 0) + return (0); + + return (PCI_MSIX_MC_TBLSZ(reg)); +} + +#else /* __HAVE_PCI_MSIX */ struct msix_vector * pci_alloc_msix_table(pci_chipset_tag_t pc, pcitag_t tag) @@ -1649,6 +1660,12 @@ void pci_resume_msix(pci_chipset_tag_t pc, pcitag_t tag, bus_space_tag_t memt, pcireg_t mc, struct msix_vector *table) { +} + +int +pci_intr_msix_count(pci_chipset_tag_t pc, pcitag_t tag) +{ + return (0); } #endif /* __HAVE_PCI_MSIX */ Index: dev/pci/pcivar.h =================================================================== RCS file: /cvs/src/sys/dev/pci/pcivar.h,v retrieving revision 1.72 diff -u -p -r1.72 pcivar.h --- dev/pci/pcivar.h 25 Jun 2019 16:46:33 -0000 1.72 +++ dev/pci/pcivar.h 10 Jun 2020 08:19:09 -0000 @@ -247,6 +247,8 @@ void pci_suspend_msix(pci_chipset_tag_t, void pci_resume_msix(pci_chipset_tag_t, pcitag_t, bus_space_tag_t, pcireg_t, struct msix_vector *); +int pci_intr_msix_count(pci_chipset_tag_t, pcitag_t); + uint16_t pci_requester_id(pci_chipset_tag_t, pcitag_t); struct pci_matchid { Index: kern/init_main.c =================================================================== RCS file: /cvs/src/sys/kern/init_main.c,v retrieving revision 1.299 diff -u -p -r1.299 init_main.c --- kern/init_main.c 29 May 2020 04:42:25 -0000 1.299 +++ kern/init_main.c 10 Jun 2020 08:19:10 -0000 @@ -85,6 +85,7 @@ #include + #include #include #include @@ -102,6 +103,11 @@ extern void kubsan_init(void); extern void nfs_init(void); #endif +#include "stoeplitz.h" +#if NSTOEPLITZ > 0 +extern void stoeplitz_init(void); +#endif + #include "mpath.h" #include "vscsi.h" #include "softraid.h" @@ -239,6 +245,10 @@ main(void *framep) * allocate mbufs or mbuf clusters during autoconfiguration. */ mbinit(); + +#if NSTOEPLITZ > 0 + stoeplitz_init(); +#endif /* Initialize sockets. */ soinit(); Index: net/ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v retrieving revision 1.39 diff -u -p -r1.39 ifq.c --- net/ifq.c 21 May 2020 00:06:16 -0000 1.39 +++ net/ifq.c 10 Jun 2020 08:19:11 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: ifq.c,v 1.39 2020/05/21 00:06:16 dlg Exp $ */ +/* $OpenBSD: ifq.c,v 1.38 2020/05/20 01:28:59 dlg Exp $ */ /* * Copyright (c) 2015 David Gwynne @@ -17,6 +17,7 @@ */ #include "bpfilter.h" +#include "kstat.h" #include #include @@ -32,6 +33,10 @@ #include #endif +#if NKSTAT > 0 +#include +#endif + /* * priq glue */ @@ -122,7 +127,10 @@ ifq_is_serialized(struct ifqueue *ifq) void ifq_start(struct ifqueue *ifq) { - if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) { + struct ifnet *ifp = ifq->ifq_if; + + if (ISSET(ifp->if_xflags, IFXF_MPSAFE) && + ifq_len(ifq) >= min(ifp->if_txmit, ifq->ifq_maxlen)) { task_del(ifq->ifq_softnet, &ifq->ifq_bundle); ifq_run_start(ifq); } else @@ -188,11 +196,42 @@ ifq_barrier_task(void *p) * ifqueue mbuf queue API */ +#if NKSTAT > 0 +struct ifq_kstat_data { + struct kstat_kv kd_packets; + struct kstat_kv kd_bytes; + struct kstat_kv kd_qdrops; + struct kstat_kv kd_errors; + struct kstat_kv kd_qlen; + struct kstat_kv kd_maxqlen; + struct kstat_kv kd_oactive; +}; + +static const struct ifq_kstat_data ifq_kstat_tpl = { + KSTAT_KV_UNIT_INITIALIZER("packets", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("bytes", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES), + KSTAT_KV_UNIT_INITIALIZER("qdrops", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("errors", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("qlen", + KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("maxqlen", + KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS), + KSTAT_KV_INITIALIZER("oactive", KSTAT_KV_T_BOOL), +}; + +static int ifq_kstat_copy(struct kstat *, void *); +#endif + void ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) { ifq->ifq_if = ifp; - ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */ + ifq->ifq_softnet = ISSET(ifp->if_xflags, IFXF_MPSAFE) ? + net_tq(ifp->if_index /* + idx */) : systq; ifq->ifq_softc = NULL; mtx_init(&ifq->ifq_mtx, IPL_NET); @@ -222,6 +261,18 @@ ifq_init(struct ifqueue *ifq, struct ifn ifq_set_maxlen(ifq, IFQ_MAXLEN); ifq->ifq_idx = idx; + +#if NKSTAT > 0 + /* XXX xname vs driver name and unit */ + ifq->ifq_kstat = kstat_create(ifp->if_xname, 0, + "txq", ifq->ifq_idx, KSTAT_T_KV, 0); + KASSERT(ifq->ifq_kstat != NULL); + kstat_set_mutex(ifq->ifq_kstat, &ifq->ifq_mtx); + ifq->ifq_kstat->ks_softc = ifq; + ifq->ifq_kstat->ks_datalen = sizeof(ifq_kstat_tpl); + ifq->ifq_kstat->ks_copy = ifq_kstat_copy; + kstat_install(ifq->ifq_kstat); +#endif } void @@ -265,6 +316,10 @@ ifq_destroy(struct ifqueue *ifq) { struct mbuf_list ml = MBUF_LIST_INITIALIZER(); +#if NKSTAT > 0 + kstat_destroy(ifq->ifq_kstat); +#endif + NET_ASSERT_UNLOCKED(); if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle)) taskq_barrier(ifq->ifq_softnet); @@ -289,6 +344,26 @@ ifq_add_data(struct ifqueue *ifq, struct mtx_leave(&ifq->ifq_mtx); } +#if NKSTAT > 0 +static int +ifq_kstat_copy(struct kstat *ks, void *dst) +{ + struct ifqueue *ifq = ks->ks_softc; + struct ifq_kstat_data *kd = dst; + + *kd = ifq_kstat_tpl; + kd->kd_packets.kv_v.v_u64 = ifq->ifq_packets; + kd->kd_bytes.kv_v.v_u64 = ifq->ifq_bytes; + kd->kd_qdrops.kv_v.v_u64 = ifq->ifq_qdrops; + kd->kd_errors.kv_v.v_u64 = ifq->ifq_errors; + kd->kd_qlen.kv_v.v_u32 = ifq->ifq_len; + kd->kd_maxqlen.kv_v.v_u32 = ifq->ifq_maxlen; + kd->kd_oactive.kv_v.v_bool = ifq->ifq_oactive; + + return (0); +} +#endif + int ifq_enqueue(struct ifqueue *ifq, struct mbuf *m) { @@ -505,6 +580,31 @@ ifq_mfreeml(struct ifqueue *ifq, struct * ifiq */ +#if NKSTAT > 0 +struct ifiq_kstat_data { + struct kstat_kv kd_packets; + struct kstat_kv kd_bytes; + struct kstat_kv kd_qdrops; + struct kstat_kv kd_errors; + struct kstat_kv kd_qlen; +}; + +static const struct ifiq_kstat_data ifiq_kstat_tpl = { + KSTAT_KV_UNIT_INITIALIZER("packets", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("bytes", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES), + KSTAT_KV_UNIT_INITIALIZER("qdrops", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("errors", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("qlen", + KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS), +}; + +static int ifiq_kstat_copy(struct kstat *, void *); +#endif + static void ifiq_process(void *); void @@ -525,11 +625,27 @@ ifiq_init(struct ifiqueue *ifiq, struct ifiq->ifiq_errors = 0; ifiq->ifiq_idx = idx; + +#if NKSTAT > 0 + /* XXX xname vs driver name and unit */ + ifiq->ifiq_kstat = kstat_create(ifp->if_xname, 0, + "rxq", ifiq->ifiq_idx, KSTAT_T_KV, 0); + KASSERT(ifiq->ifiq_kstat != NULL); + kstat_set_mutex(ifiq->ifiq_kstat, &ifiq->ifiq_mtx); + ifiq->ifiq_kstat->ks_softc = ifiq; + ifiq->ifiq_kstat->ks_datalen = sizeof(ifiq_kstat_tpl); + ifiq->ifiq_kstat->ks_copy = ifiq_kstat_copy; + kstat_install(ifiq->ifiq_kstat); +#endif } void ifiq_destroy(struct ifiqueue *ifiq) { +#if NKSTAT > 0 + kstat_destroy(ifiq->ifiq_kstat); +#endif + NET_ASSERT_UNLOCKED(); if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) taskq_barrier(ifiq->ifiq_softnet); @@ -616,6 +732,24 @@ ifiq_add_data(struct ifiqueue *ifiq, str data->ifi_iqdrops += ifiq->ifiq_qdrops; mtx_leave(&ifiq->ifiq_mtx); } + +#if NKSTAT > 0 +static int +ifiq_kstat_copy(struct kstat *ks, void *dst) +{ + struct ifiqueue *ifiq = ks->ks_softc; + struct ifiq_kstat_data *kd = dst; + + *kd = ifiq_kstat_tpl; + kd->kd_packets.kv_v.v_u64 = ifiq->ifiq_packets; + kd->kd_bytes.kv_v.v_u64 = ifiq->ifiq_bytes; + kd->kd_qdrops.kv_v.v_u64 = ifiq->ifiq_qdrops; + kd->kd_errors.kv_v.v_u64 = ifiq->ifiq_errors; + kd->kd_qlen.kv_v.v_u32 = ml_len(&ifiq->ifiq_ml); + + return (0); +} +#endif int ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m) Index: net/ifq.h =================================================================== RCS file: /cvs/src/sys/net/ifq.h,v retrieving revision 1.31 diff -u -p -r1.31 ifq.h --- net/ifq.h 22 May 2020 07:02:24 -0000 1.31 +++ net/ifq.h 10 Jun 2020 08:19:11 -0000 @@ -20,6 +20,7 @@ #define _NET_IFQ_H_ struct ifnet; +struct kstat; struct ifq_ops; @@ -54,6 +55,8 @@ struct ifqueue { uint64_t ifq_errors; uint64_t ifq_mcasts; + struct kstat *ifq_kstat; + /* work serialisation */ struct mutex ifq_task_mtx; struct task_list ifq_task_list; @@ -91,6 +94,8 @@ struct ifiqueue { uint64_t ifiq_errors; uint64_t ifiq_mcasts; uint64_t ifiq_noproto; + + struct kstat *ifiq_kstat; /* properties */ unsigned int ifiq_idx; Index: net/toeplitz.c =================================================================== RCS file: net/toeplitz.c diff -N net/toeplitz.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ net/toeplitz.c 10 Jun 2020 08:19:11 -0000 @@ -0,0 +1,235 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2009 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Sepherosa Ziehau + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright (c) 2019 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include + +#include + +#ifdef _KERNEL +#include +#endif + +/* + * symmetric toeplitz + */ + +static stoeplitz_key stoeplitz_keyseed = STOEPLITZ_KEYSEED; +static struct stoeplitz_cache stoeplitz_syskey_cache; +const struct stoeplitz_cache *const + stoeplitz_cache = &stoeplitz_syskey_cache; + +void +stoeplitz_init(void) +{ +printf("%s\n", __func__); + stoeplitz_cache_init(&stoeplitz_syskey_cache, stoeplitz_keyseed); +} + +#define NBSK (NBBY * sizeof(stoeplitz_key)) + +void +stoeplitz_cache_init(struct stoeplitz_cache *scache, stoeplitz_key skey) +{ + uint16_t key[NBBY]; + unsigned int j, b, shift, val; + +printf("%s %04x\n", __func__, skey); + bzero(key, sizeof(key)); + + /* + * Calculate 32bit keys for one byte; one key for each bit. + */ + for (b = 0; b < NBBY; ++b) { + for (j = 0; j < NBSK; ++j) { + unsigned int bit; + + bit = b + j; + + shift = NBSK - (bit % NBSK) - 1; + if (skey & (1 << shift)) + key[b] |= 1 << (NBSK - j); + } + } + + /* + * Cache the results of all possible bit combination of + * one byte. + */ + for (val = 0; val < 256; ++val) { + uint16_t res = 0; + + for (b = 0; b < NBBY; ++b) { + shift = NBBY - b - 1; + if (val & (1 << shift)) + res ^= key[b]; + } + scache->bytes[val] = res; + } +} + +uint16_t +stoeplitz_hash_ip4(const struct stoeplitz_cache *scache, + in_addr_t faddr, in_addr_t laddr) +{ + uint16_t lo, hi; + + lo = stoeplitz_cache_entry(scache, faddr >> 0); + lo ^= stoeplitz_cache_entry(scache, faddr >> 16); + lo ^= stoeplitz_cache_entry(scache, laddr >> 0); + lo ^= stoeplitz_cache_entry(scache, laddr >> 16); + + hi = stoeplitz_cache_entry(scache, faddr >> 8); + hi ^= stoeplitz_cache_entry(scache, faddr >> 24); + hi ^= stoeplitz_cache_entry(scache, laddr >> 8); + hi ^= stoeplitz_cache_entry(scache, laddr >> 24); + + return (swap16(lo) ^ hi); +} + +uint16_t +stoeplitz_hash_ip4port(const struct stoeplitz_cache *scache, + in_addr_t faddr, in_addr_t laddr, in_port_t fport, in_port_t lport) +{ + uint16_t hi, lo; + + lo = stoeplitz_cache_entry(scache, faddr >> 0); + lo ^= stoeplitz_cache_entry(scache, faddr >> 16); + lo ^= stoeplitz_cache_entry(scache, laddr >> 0); + lo ^= stoeplitz_cache_entry(scache, laddr >> 16); + lo ^= stoeplitz_cache_entry(scache, fport >> 0); + lo ^= stoeplitz_cache_entry(scache, lport >> 0); + + hi = stoeplitz_cache_entry(scache, faddr >> 8); + hi ^= stoeplitz_cache_entry(scache, faddr >> 24); + hi ^= stoeplitz_cache_entry(scache, laddr >> 8); + hi ^= stoeplitz_cache_entry(scache, laddr >> 24); + hi ^= stoeplitz_cache_entry(scache, fport >> 8); + hi ^= stoeplitz_cache_entry(scache, lport >> 8); + + return (swap16(lo) ^ hi); +} + +#ifdef INET6 +uint16_t +stoeplitz_hash_ip6(const struct stoeplitz_cache *scache, + const struct in6_addr *faddr6, const struct in6_addr *laddr6) +{ + uint16_t hi = 0, lo = 0; + size_t i; + + for (i = 0; i < nitems(faddr6->s6_addr32); i++) { + uint32_t faddr = faddr6->s6_addr32[i]; + uint32_t laddr = laddr6->s6_addr32[i]; + + lo ^= stoeplitz_cache_entry(scache, faddr >> 0); + lo ^= stoeplitz_cache_entry(scache, faddr >> 16); + lo ^= stoeplitz_cache_entry(scache, laddr >> 0); + lo ^= stoeplitz_cache_entry(scache, laddr >> 16); + + hi ^= stoeplitz_cache_entry(scache, faddr >> 8); + hi ^= stoeplitz_cache_entry(scache, faddr >> 24); + hi ^= stoeplitz_cache_entry(scache, laddr >> 8); + hi ^= stoeplitz_cache_entry(scache, laddr >> 24); + } + + return (swap16(lo) ^ hi); +} + +uint16_t +stoeplitz_hash_ip6port(const struct stoeplitz_cache *scache, + const struct in6_addr *faddr6, const struct in6_addr * laddr6, + in_port_t fport, in_port_t lport) +{ + uint16_t hi = 0, lo = 0; + size_t i; + + for (i = 0; i < nitems(faddr6->s6_addr32); i++) { + uint32_t faddr = faddr6->s6_addr32[i]; + uint32_t laddr = laddr6->s6_addr32[i]; + + lo ^= stoeplitz_cache_entry(scache, faddr >> 0); + lo ^= stoeplitz_cache_entry(scache, faddr >> 16); + lo ^= stoeplitz_cache_entry(scache, laddr >> 0); + lo ^= stoeplitz_cache_entry(scache, laddr >> 16); + + hi ^= stoeplitz_cache_entry(scache, faddr >> 8); + hi ^= stoeplitz_cache_entry(scache, faddr >> 24); + hi ^= stoeplitz_cache_entry(scache, laddr >> 8); + hi ^= stoeplitz_cache_entry(scache, laddr >> 24); + } + + lo ^= stoeplitz_cache_entry(scache, fport >> 0); + lo ^= stoeplitz_cache_entry(scache, lport >> 0); + + hi ^= stoeplitz_cache_entry(scache, fport >> 8); + hi ^= stoeplitz_cache_entry(scache, lport >> 8); + + return (swap16(lo) ^ hi); +} +#endif /* INET6 */ + +void +stoeplitz_to_key(uint8_t *k, size_t klen) +{ + uint16_t skey = htons(stoeplitz_keyseed); + size_t i; + + KASSERT((klen % 2) == 0); + + for (i = 0; i < klen; i += sizeof(skey)) { + k[i + 0] = skey >> 8; + k[i + 1] = skey; + } +} Index: net/toeplitz.h =================================================================== RCS file: net/toeplitz.h diff -N net/toeplitz.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ net/toeplitz.h 10 Jun 2020 08:19:11 -0000 @@ -0,0 +1,113 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2019 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_NET_TOEPLITZ_H_ +#define _SYS_NET_TOEPLITZ_H_ + +#include + +/* + * symmetric toeplitz + */ + +typedef uint16_t stoeplitz_key; + +struct stoeplitz_cache { + uint16_t bytes[256]; +}; + +static __unused inline uint16_t +stoeplitz_cache_entry(const struct stoeplitz_cache *scache, uint8_t byte) +{ + return (scache->bytes[byte]); +} + +void stoeplitz_cache_init(struct stoeplitz_cache *, stoeplitz_key); + +uint16_t stoeplitz_hash_ip4(const struct stoeplitz_cache *, + uint32_t, uint32_t); +uint16_t stoeplitz_hash_ip4port(const struct stoeplitz_cache *, + uint32_t, uint32_t, uint16_t, uint16_t); + +#ifdef INET6 +struct in6_addr; +uint16_t stoeplitz_hash_ip6(const struct stoeplitz_cache *, + const struct in6_addr *, const struct in6_addr *); +uint16_t stoeplitz_hash_ip6port(const struct stoeplitz_cache *, + const struct in6_addr *, const struct in6_addr *, + uint16_t, uint16_t); +#endif + +/* hash a uint16_t in network byte order */ +static __unused inline uint16_t +stoeplitz_hash_n16(const struct stoeplitz_cache *scache, uint16_t n16) +{ + uint16_t hi, lo; + + hi = stoeplitz_cache_entry(scache, n16 >> 8); + lo = stoeplitz_cache_entry(scache, n16); + + return (hi ^ swap16(lo)); +} + +/* hash a uint16_t in host byte order */ +static __unused inline uint16_t +stoeplitz_hash_h16(const struct stoeplitz_cache *scache, uint16_t h16) +{ + uint16_t lo, hi; + + lo = stoeplitz_cache_entry(scache, h16); + hi = stoeplitz_cache_entry(scache, h16 >> 8); + +#if _BYTE_ORDER == _BIG_ENDIAN + return (hi ^ swap16(lo)); +#else + return (swap16(hi) ^ lo); +#endif +} + +/* + * system provided symmetric toeplitz + */ + +#define STOEPLITZ_KEYSEED 0x6d5a + +void stoeplitz_init(void); + +void stoeplitz_to_key(uint8_t *, size_t) + __bounded((__buffer__, 1, 2)); + +extern const struct stoeplitz_cache *const stoeplitz_cache; + +#define stoeplitz_n16(_n16) \ + stoeplitz_cache_n16(stoeplitz_cache, (_n16)) +#define stoeplitz_h16(_h16) \ + stoeplitz_cache_h16(stoeplitz_cache, (_h16)) +#define stoeplitz_port(_p) stoeplitz_n16((_p)) +#define stoeplitz_ip4(_sa4, _da4) \ + stoeplitz_hash_ip4(stoeplitz_cache, (_sa4), (_da4)) +#define stoeplitz_ip4port(_sa4, _da4, _sp, _dp) \ + stoeplitz_hash_ip4port(stoeplitz_cache, (_sa4), (_da4), (_sp), (_dp)) +#ifdef INET6 +#define stoeplitz_ip6(_sa6, _da6) \ + stoeplitz_hash_ip6(stoeplitz_cache, (_sa6), (_da6)) +#define stoeplitz_ip6port(_sa6, _da6, _sp, _dp) \ + stoeplitz_hash_ip6port(stoeplitz_cache, (_sa6), (_da6), (_sp), (_dp)) +#endif + +#endif /* _SYS_NET_TOEPLITZ_H_ */ Index: sys/conf.h =================================================================== RCS file: /cvs/src/sys/sys/conf.h,v retrieving revision 1.152 diff -u -p -r1.152 conf.h --- sys/conf.h 26 May 2020 07:53:00 -0000 1.152 +++ sys/conf.h 10 Jun 2020 08:19:11 -0000 @@ -328,6 +328,13 @@ extern struct cdevsw cdevsw[]; (dev_type_stop((*))) enodev, 0, seltrue, \ (dev_type_mmap((*))) enodev, 0, 0, seltrue_kqfilter } +/* open, close, ioctl */ +#define cdev_kstat_init(c,n) { \ + dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \ + (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \ + (dev_type_stop((*))) enodev, 0, selfalse, \ + (dev_type_mmap((*))) enodev } + /* open, close, read, write, ioctl, stop, tty, poll, mmap, kqfilter */ #define cdev_wsdisplay_init(c,n) { \ dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \ @@ -605,6 +612,7 @@ cdev_decl(wsmouse); cdev_decl(wsmux); cdev_decl(ksyms); +cdev_decl(kstat); cdev_decl(bio); cdev_decl(vscsi); Index: sys/kstat.h =================================================================== RCS file: sys/kstat.h diff -N sys/kstat.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/kstat.h 10 Jun 2020 08:19:11 -0000 @@ -0,0 +1,195 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2020 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_KSTAT_H_ +#define _SYS_KSTAT_H_ + +#include + +#define KSTAT_STRLEN 32 + +#define KSTAT_T_RAW 0 +#define KSTAT_T_KV 1 +#define KSTAT_T_COUNTERS 2 + +struct kstat_req { + unsigned int ks_rflags; +#define KSTATIOC_F_IGNVER (1 << 0) + /* the current version of the kstat subsystem */ + unsigned int ks_version; + + uint64_t ks_id; + + char ks_provider[KSTAT_STRLEN]; + unsigned int ks_instance; + char ks_name[KSTAT_STRLEN]; + unsigned int ks_unit; + + struct timespec ks_created; + struct timespec ks_updated; + struct timespec ks_interval; + unsigned int ks_type; + unsigned int ks_state; + + void *ks_data; + size_t ks_datalen; + unsigned int ks_dataver; +}; + +/* ioctls */ + +#define KSTATIOC_VERSION _IOR('k', 1, unsigned int) +#define KSTATIOC_FIND_ID _IOWR('k', 2, struct kstat_req) +#define KSTATIOC_NFIND_ID _IOWR('k', 3, struct kstat_req) +#define KSTATIOC_FIND_PROVIDER _IOWR('k', 4, struct kstat_req) +#define KSTATIOC_NFIND_PROVIDER _IOWR('k', 5, struct kstat_req) +#define KSTATIOC_FIND_NAME _IOWR('k', 6, struct kstat_req) +#define KSTATIOC_NFIND_NAME _IOWR('k', 7, struct kstat_req) + +/* named data */ + +#define KSTAT_KV_NAMELEN 16 +#define KSTAT_KV_ALIGN sizeof(uint64_t) + +enum kstat_kv_type { + KSTAT_KV_T_NULL, + KSTAT_KV_T_BOOL, + KSTAT_KV_T_COUNTER64, + KSTAT_KV_T_COUNTER32, + KSTAT_KV_T_UINT64, + KSTAT_KV_T_INT64, + KSTAT_KV_T_UINT32, + KSTAT_KV_T_INT32, + KSTAT_KV_T_ISTR, /* inline string */ + KSTAT_KV_T_STR, /* trailing string */ + KSTAT_KV_T_BYTES, /* trailing bytes */ +}; + +/* units only apply to integer types */ +enum kstat_kv_unit { + KSTAT_KV_U_NONE = 0, + KSTAT_KV_U_PACKETS, /* packets */ + KSTAT_KV_U_BYTES, /* bytes */ + KSTAT_KV_U_TEMP, /* temperature (uK) */ + KSTAT_KV_U_FANRPM, /* fan revolution speed */ + KSTAT_KV_U_VOLTS_DC, /* voltage (uV DC) */ + KSTAT_KV_U_VOLTS_AC, /* voltage (uV AC) */ + KSTAT_KV_U_OHMS, /* resistance */ + KSTAT_KV_U_WATTS, /* power (uW) */ + KSTAT_KV_U_AMPS, /* current (uA) */ + KSTAT_KV_U_WATTHOUR, /* power capacity (uWh) */ + KSTAT_KV_U_AMPHOUR, /* power capacity (uAh) */ + KSTAT_KV_U_PERCENT, /* percent (m%) */ + KSTAT_KV_U_LUX, /* illuminance (ulx) */ + KSTAT_KV_U_TIMEDELTA, /* system time error (nSec) */ + KSTAT_KV_U_HUMIDITY, /* humidity (m%RH) */ + KSTAT_KV_U_FREQ, /* frequency (uHz) */ + KSTAT_KV_U_ANGLE, /* angle (uDegrees) */ + KSTAT_KV_U_DISTANCE, /* distance (uMeter) */ + KSTAT_KV_U_PRESSURE, /* pressure (mPa) */ + KSTAT_KV_U_ACCEL, /* acceleration (u m/s^2) */ + KSTAT_KV_U_VELOCITY, /* velocity (u m/s) */ +}; + +struct kstat_kv { + char kv_key[KSTAT_KV_NAMELEN]; + union { + char v_istr[16]; + unsigned int v_bool; + uint64_t v_u64; + int64_t v_s64; + uint32_t v_u32; + int32_t v_s32; + size_t v_len; + } kv_v; + enum kstat_kv_type kv_type; + enum kstat_kv_unit kv_unit; +} __aligned(KSTAT_KV_ALIGN); + +#define KSTAT_KV_UNIT_INITIALIZER(_key, _type, _unit) { \ + .kv_key = (_key), \ + .kv_type = (_type), \ + .kv_unit = (_unit), \ +} + +#define KSTAT_KV_INITIALIZER(_key, _type) \ + KSTAT_KV_UNIT_INITIALIZER((_key), (_type), KSTAT_KV_U_NONE) + +void kstat_kv_init(struct kstat_kv *, const char *, enum kstat_kv_type); +void kstat_kv_unit_init(struct kstat_kv *, const char *, + enum kstat_kv_type, enum kstat_kv_unit); + +#ifdef _KERNEL + +#include + +struct kstat_lock_ops; + +struct kstat { + uint64_t ks_id; + + const char *ks_provider; + unsigned int ks_instance; + const char *ks_name; + unsigned int ks_unit; + + unsigned int ks_type; + unsigned int ks_flags; +#define KSTAT_F_REALLOC (1 << 0) + unsigned int ks_state; +#define KSTAT_S_CREATED 0 +#define KSTAT_S_INSTALLED 1 + + struct timespec ks_created; + RBT_ENTRY(kstat) ks_id_entry; + RBT_ENTRY(kstat) ks_pv_entry; + RBT_ENTRY(kstat) ks_nm_entry; + + /* the driver can update these between kstat creation and install */ + unsigned int ks_dataver; + void *ks_softc; + int (*ks_read)(struct kstat *); + int (*ks_copy)(struct kstat *, void *); + + const struct kstat_lock_ops * + ks_lock_ops; + void *ks_lock; + + /* the data that is updated by ks_read */ + void *ks_data; + size_t ks_datalen; + struct timespec ks_updated; + struct timespec ks_interval; +}; + +struct kstat *kstat_create(const char *, unsigned int, + const char *, unsigned int, + unsigned int, unsigned int); + +void kstat_set_rlock(struct kstat *, struct rwlock *); +void kstat_set_wlock(struct kstat *, struct rwlock *); +void kstat_set_mutex(struct kstat *, struct mutex *); + +int kstat_read_nop(struct kstat *); + +void kstat_install(struct kstat *); +void kstat_destroy(struct kstat *); + +#endif /* _KERNEL */ + +#endif /* _SYS_KSTAT_H_ */