Index: if_vmx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_vmx.c,v retrieving revision 1.70 diff -u -p -r1.70 if_vmx.c --- if_vmx.c 11 Sep 2022 08:38:39 -0000 1.70 +++ if_vmx.c 29 Jul 2023 03:43:33 -0000 @@ -61,15 +61,29 @@ #define VMX_RX_GEN htole32(VMXNET3_RX_GEN_M << VMXNET3_RX_GEN_S) #define VMX_RXC_GEN htole32(VMXNET3_RXC_GEN_M << VMXNET3_RXC_GEN_S) +struct vmx_dmamem { + bus_dmamap_t vdm_map; + bus_dma_segment_t vdm_seg; + int vdm_nsegs; + size_t vdm_size; + caddr_t vdm_kva; +}; + +#define VMX_DMA_MAP(_vdm) ((_vdm)->vdm_map) +#define VMX_DMA_DVA(_vdm) ((_vdm)->vdm_map->dm_segs[0].ds_addr) +#define VMX_DMA_KVA(_vdm) ((void *)(_vdm)->vdm_kva) +#define VMX_DMA_LEN(_vdm) ((_vdm)->vdm_size) + struct vmxnet3_softc; struct vmxnet3_txring { struct mbuf *m[NTXDESC]; bus_dmamap_t dmap[NTXDESC]; - struct vmxnet3_txdesc *txd; - u_int32_t gen; - u_int prod; - u_int cons; + struct vmx_dmamem ring; + + uint32_t gen; + volatile unsigned int prod; + volatile unsigned int cons; }; struct vmxnet3_rxring { @@ -86,6 +100,7 @@ struct vmxnet3_rxring { }; struct vmxnet3_comp_ring { + struct vmx_dmamem ring; union { struct vmxnet3_txcompdesc *txcd; struct vmxnet3_rxcompdesc *rxcd; @@ -101,6 +116,7 @@ struct vmxnet3_txqueue { struct vmxnet3_txq_shared *ts; struct ifqueue *ifq; struct kstat *txkstat; + unsigned int id; } __aligned(64); struct vmxnet3_rxqueue { @@ -193,6 +209,12 @@ void vmxnet3_media_status(struct ifnet * int vmxnet3_media_change(struct ifnet *); void *vmxnet3_dma_allocmem(struct vmxnet3_softc *, u_int, u_int, bus_addr_t *); +static int vmx_dmamem_alloc(struct vmxnet3_softc *, struct vmx_dmamem *, + bus_size_t, u_int); +#ifdef notyet +static void vmx_dmamem_free(struct vmxnet3_softc *, struct vmx_dmamem *); +#endif + #if NKSTAT > 0 static void vmx_kstat_init(struct vmxnet3_softc *); static void vmx_kstat_txstats(struct vmxnet3_softc *, @@ -532,16 +554,17 @@ vmxnet3_alloc_txring(struct vmxnet3_soft struct vmxnet3_txq_shared *ts; struct vmxnet3_txring *ring = &tq->cmd_ring; struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring; - bus_addr_t pa, comp_pa; int idx; - ring->txd = vmxnet3_dma_allocmem(sc, NTXDESC * sizeof ring->txd[0], 512, &pa); - if (ring->txd == NULL) + tq->id = queue; + + if (vmx_dmamem_alloc(sc, &ring->ring, + NTXDESC * sizeof(struct vmxnet3_txdesc), 512) != 0) return -1; - comp_ring->txcd = vmxnet3_dma_allocmem(sc, - NTXCOMPDESC * sizeof comp_ring->txcd[0], 512, &comp_pa); - if (comp_ring->txcd == NULL) + if (vmx_dmamem_alloc(sc, &comp_ring->ring, + NTXCOMPDESC * sizeof(comp_ring->txcd[0]), 512) != 0) return -1; + comp_ring->txcd = VMX_DMA_KVA(&comp_ring->ring); for (idx = 0; idx < NTXDESC; idx++) { if (bus_dmamap_create(sc->sc_dmat, JUMBO_LEN, NTXSEGS, @@ -553,9 +576,9 @@ vmxnet3_alloc_txring(struct vmxnet3_soft bzero(ts, sizeof *ts); ts->npending = 0; ts->intr_threshold = 1; - ts->cmd_ring = pa; + ts->cmd_ring = VMX_DMA_DVA(&ring->ring); ts->cmd_ring_len = NTXDESC; - ts->comp_ring = comp_pa; + ts->comp_ring = VMX_DMA_DVA(&comp_ring->ring); ts->comp_ring_len = NTXCOMPDESC; ts->driver_data = ~0ULL; ts->driver_data_len = 0; @@ -627,8 +650,8 @@ vmxnet3_txinit(struct vmxnet3_softc *sc, ring->gen = VMX_TX_GEN; comp_ring->next = 0; comp_ring->gen = VMX_TXC_GEN; - bzero(ring->txd, NTXDESC * sizeof ring->txd[0]); - bzero(comp_ring->txcd, NTXCOMPDESC * sizeof comp_ring->txcd[0]); + memset(VMX_DMA_KVA(&ring->ring), 0, VMX_DMA_LEN(&ring->ring)); + memset(VMX_DMA_KVA(&comp_ring->ring), 0, VMX_DMA_LEN(&comp_ring->ring)); } void @@ -924,18 +947,22 @@ vmxnet3_txintr(struct vmxnet3_softc *sc, struct vmxnet3_txcompdesc *txcd; bus_dmamap_t map; struct mbuf *m; - u_int cons, next; + u_int prod, cons, next; uint32_t rgen; + prod = ring->prod; cons = ring->cons; - if (cons == ring->prod) + + if (cons == prod) return; next = comp_ring->next; rgen = comp_ring->gen; - /* postread */ - for (;;) { + bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->ring), + 0, VMX_DMA_LEN(&comp_ring->ring), BUS_DMASYNC_POSTREAD); + + do { txcd = &comp_ring->txcd[next]; if ((txcd->txc_word3 & VMX_TXC_GEN) != rgen) break; @@ -958,8 +985,10 @@ vmxnet3_txintr(struct vmxnet3_softc *sc, VMXNET3_TXC_EOPIDX_M; cons++; cons %= NTXDESC; - } - /* preread */ + } while (cons != prod); + + bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->ring), + 0, VMX_DMA_LEN(&comp_ring->ring), BUS_DMASYNC_PREREAD); comp_ring->next = next; comp_ring->gen = rgen; @@ -1323,6 +1352,7 @@ vmxnet3_start(struct ifqueue *ifq) struct vmxnet3_softc *sc = ifp->if_softc; struct vmxnet3_txqueue *tq = ifq->ifq_softc; struct vmxnet3_txring *ring = &tq->cmd_ring; + struct vmxnet3_txdesc *txr; struct vmxnet3_txdesc *txd, *sop; bus_dmamap_t map; unsigned int prod, free, i; @@ -1337,6 +1367,10 @@ vmxnet3_start(struct ifqueue *ifq) free += NTXDESC; free -= prod; + bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->ring), + 0, VMX_DMA_LEN(&ring->ring), BUS_DMASYNC_POSTWRITE); + + txr = VMX_DMA_KVA(&ring->ring); rgen = ring->gen; for (;;) { @@ -1368,9 +1402,9 @@ vmxnet3_start(struct ifqueue *ifq) map->dm_mapsize, BUS_DMASYNC_PREWRITE); gen = rgen ^ VMX_TX_GEN; - sop = &ring->txd[prod]; + sop = &txr[prod]; for (i = 0; i < map->dm_nsegs; i++) { - txd = &ring->txd[prod]; + txd = &txr[prod]; txd->tx_addr = htole64(map->dm_segs[i].ds_addr); txd->tx_word2 = htole32(map->dm_segs[i].ds_len << VMXNET3_TX_LEN_S) | gen; @@ -1386,26 +1420,33 @@ vmxnet3_start(struct ifqueue *ifq) txd->tx_word3 = htole32(VMXNET3_TX_EOP | VMXNET3_TX_COMPREQ); if (ISSET(m->m_flags, M_VLANTAG)) { - sop->tx_word3 |= htole32(VMXNET3_TX_VTAG_MODE); - sop->tx_word3 |= htole32((m->m_pkthdr.ether_vtag & - VMXNET3_TX_VLANTAG_M) << VMXNET3_TX_VLANTAG_S); + uint32_t vtag = VMXNET3_TX_VTAG_MODE; + vtag |= (m->m_pkthdr.ether_vtag & VMXNET3_TX_VLANTAG_M) + << VMXNET3_TX_VLANTAG_S; + + sop->tx_word3 |= htole32(vtag); } + ring->prod = prod; /* Change the ownership by flipping the "generation" bit */ - membar_producer(); + bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->ring), + 0, VMX_DMA_LEN(&ring->ring), + BUS_DMASYNC_PREWRITE|BUS_DMASYNC_POSTWRITE); sop->tx_word2 ^= VMX_TX_GEN; free -= i; post = 1; } + bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->ring), + 0, VMX_DMA_LEN(&ring->ring), BUS_DMASYNC_PREWRITE); + if (!post) return; - ring->prod = prod; ring->gen = rgen; - WRITE_BAR0(sc, VMXNET3_BAR0_TXH(0), prod); + WRITE_BAR0(sc, VMXNET3_BAR0_TXH(tq->id), prod); } void @@ -1468,6 +1509,49 @@ vmxnet3_dma_allocmem(struct vmxnet3_soft bus_dmamap_destroy(t, map); return va; } + +static int +vmx_dmamem_alloc(struct vmxnet3_softc *sc, struct vmx_dmamem *vdm, + bus_size_t size, u_int align) +{ + vdm->vdm_size = size; + + if (bus_dmamap_create(sc->sc_dmat, vdm->vdm_size, 1, + vdm->vdm_size, 0, + BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, + &vdm->vdm_map) != 0) + return (1); + if (bus_dmamem_alloc(sc->sc_dmat, vdm->vdm_size, + align, 0, &vdm->vdm_seg, 1, &vdm->vdm_nsegs, + BUS_DMA_WAITOK | BUS_DMA_ZERO) != 0) + goto destroy; + if (bus_dmamem_map(sc->sc_dmat, &vdm->vdm_seg, vdm->vdm_nsegs, + vdm->vdm_size, &vdm->vdm_kva, BUS_DMA_WAITOK) != 0) + goto free; + if (bus_dmamap_load(sc->sc_dmat, vdm->vdm_map, vdm->vdm_kva, + vdm->vdm_size, NULL, BUS_DMA_WAITOK) != 0) + goto unmap; + + return (0); +unmap: + bus_dmamem_unmap(sc->sc_dmat, vdm->vdm_kva, vdm->vdm_size); +free: + bus_dmamem_free(sc->sc_dmat, &vdm->vdm_seg, 1); +destroy: + bus_dmamap_destroy(sc->sc_dmat, vdm->vdm_map); + return (1); +} + +#ifdef notyet +static void +vmx_dmamem_free(struct vmxnet3_softc *sc, struct vmx_dmamem *vdm) +{ + bus_dmamap_unload(sc->sc_dmat, vdm->vdm_map); + bus_dmamem_unmap(sc->sc_dmat, vdm->vdm_kva, vdm->vdm_size); + bus_dmamem_free(sc->sc_dmat, &vdm->vdm_seg, 1); + bus_dmamap_destroy(sc->sc_dmat, vdm->vdm_map); +} +#endif #if NKSTAT > 0 /*