Index: if_mcx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_mcx.c,v retrieving revision 1.95 diff -u -p -r1.95 if_mcx.c --- if_mcx.c 25 Jan 2021 01:45:55 -0000 1.95 +++ if_mcx.c 25 Jan 2021 05:17:04 -0000 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -85,10 +86,10 @@ #define MCX_LOG_RQ_SIZE 10 #define MCX_LOG_SQ_SIZE 11 -#define MCX_MAX_QUEUES 1 +#define MCX_MAX_QUEUES 16 /* completion event moderation - about 10khz, or 90% of the cq */ -#define MCX_CQ_MOD_PERIOD 50 +#define MCX_CQ_MOD_PERIOD 5 #define MCX_CQ_MOD_COUNTER \ (((1 << (MCX_LOG_CQ_SIZE - 1)) * 9) / 10) @@ -162,6 +163,8 @@ CTASSERT(MCX_MAX_QUEUES * MCX_WQ_DOORBEL #define MCX_REG_PAOS 0x5006 #define MCX_REG_PFCC 0x5007 #define MCX_REG_PPCNT 0x5008 +#define MCX_REG_PPTB 0x500b +#define MCX_REG_PBMC 0x500c #define MCX_REG_MTCAP 0x9009 /* mgmt temp capabilities */ #define MCX_REG_MTMP 0x900a /* mgmt temp */ #define MCX_REG_MCIA 0x9014 @@ -587,6 +590,40 @@ struct mcx_reg_mcam { #define MCX_MCAM_FEATURE_CAP_SENSOR_MAP 6 +struct mcx_reg_pptb { + uint8_t pptb_mm; + uint8_t pptb_local_port; + uint8_t pptb_um_cm; + uint8_t pptb_prio_buff_mask; + + uint8_t pptb_prio_buff[4]; + + uint8_t pptb_pm_msb; + uint8_t _reserved1[2]; + uint8_t pptb_ctrl_untagged_buff; +} __packed __aligned(4); + +struct mcx_reg_pbmc { + uint8_t _reserved1[1]; + uint8_t pbmc_local_port; + uint8_t _reserved2[2]; + + uint16_t pbmc_xoff_timer; + uint16_t pbmc_xoff_refresh; + + uint8_t _reserved3[1]; + uint8_t pbmc_full_thresh; + uint16_t pbmc_port_buffer_size; + + struct { + uint8_t pbmcb_lossy_epsb; + uint8_t _reserved1[1]; + uint16_t pbmcb_size; + uint16_t pbmcb_xoff_thresh; + uint16_t pbmcb_xon_thresh; + } pbmc_port_buffer[10]; +} __packed __aligned(4); + struct mcx_reg_mtcap { uint8_t _reserved1[3]; uint8_t mtcap_sensor_count; @@ -2266,6 +2303,8 @@ struct mcx_cq { bus_addr_t cq_doorbell; uint32_t cq_cons; uint32_t cq_count; + + uint64_t cq_uval; }; struct mcx_calibration { @@ -2479,7 +2518,7 @@ struct mcx_softc { uint32_t sc_khz; struct intrmap *sc_intrmap; - struct mcx_queues sc_queues[MCX_MAX_QUEUES]; + struct mcx_queues *sc_queues; int sc_mcam_reg; @@ -2557,6 +2596,8 @@ static int mcx_set_flow_table_entry_prot int, int, uint32_t); static int mcx_delete_flow_table_entry(struct mcx_softc *, int, int); +static void mcx_print_buffer_stuff(struct mcx_softc *); + #if NKSTAT > 0 static int mcx_query_rq(struct mcx_softc *, struct mcx_rx *, struct mcx_rq_ctx *); static int mcx_query_sq(struct mcx_softc *, struct mcx_tx *, struct mcx_sq_ctx *); @@ -2878,6 +2919,8 @@ mcx_attach(struct device *parent, struct printf(", %s, address %s\n", intrstr, ether_sprintf(sc->sc_ac.ac_enaddr)); + mcx_print_buffer_stuff(sc); + msix = pci_intr_msix_count(pa->pa_pc, pa->pa_tag); sc->sc_intrmap = intrmap_create(&sc->sc_dev, msix, MCX_MAX_QUEUES, INTRMAP_POWEROF2); @@ -2885,6 +2928,12 @@ mcx_attach(struct device *parent, struct printf("%s: unable to create interrupt map\n", DEVNAME(sc)); goto teardown; } + sc->sc_queues = mallocarray(intrmap_count(sc->sc_intrmap), + sizeof(*sc->sc_queues), M_DEVBUF, M_WAITOK|M_ZERO); + if (sc->sc_queues == NULL) { + printf("%s: unable to create queues\n", DEVNAME(sc)); + goto intrunmap; + } strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ); ifp->if_softc = sc; @@ -2996,6 +3045,9 @@ intrdisestablish: pci_intr_disestablish(sc->sc_pc, q->q_ihc); q->q_ihc = NULL; } + free(sc->sc_queues, M_DEVBUF, + intrmap_count(sc->sc_intrmap) * sizeof(*sc->sc_queues)); +intrunmap: intrmap_destroy(sc->sc_intrmap); sc->sc_intrmap = NULL; teardown: @@ -3544,6 +3596,47 @@ free: return (error); } +static void +mcx_print_buffer_stuff(struct mcx_softc *sc) +{ + struct mcx_reg_pptb pptb; + struct mcx_reg_pbmc pbmc; + int error, i; + + memset(&pptb, 0, sizeof(pptb)); + pptb.pptb_local_port = 1; + error = mcx_access_hca_reg(sc, MCX_REG_PPTB, MCX_REG_OP_READ, &pptb, + sizeof(pptb)); + if (error != 0) { + printf(" -- unable to get port prio buffer mapping --"); + return; + } + + memset(&pbmc, 0, sizeof(pbmc)); + pbmc.pbmc_local_port = 1; + error = mcx_access_hca_reg(sc, MCX_REG_PBMC, MCX_REG_OP_READ, &pbmc, + sizeof(pbmc)); + if (error != 0) { + printf(" -- unable to get port buffer mgmt control --"); + return; + } + + printf("pptb: pm %x, prio %x %x %x %x\n", pptb.pptb_prio_buff_mask, + pptb.pptb_prio_buff[0], pptb.pptb_prio_buff[1], pptb.pptb_prio_buff[2], pptb.pptb_prio_buff[3]); + + printf("pbmc: xoff timer %x refresh %x, ft %x, buffer size %d\n", + pbmc.pbmc_xoff_timer, pbmc.pbmc_xoff_refresh, pbmc.pbmc_full_thresh, + pbmc.pbmc_port_buffer_size); + + for (i = 0; i < 10; i++) { + printf("pbmc%d: lossy/epsb %x size %d, xoff %x xon %x\n", i, + pbmc.pbmc_port_buffer[i].pbmcb_lossy_epsb, + pbmc.pbmc_port_buffer[i].pbmcb_size, + pbmc.pbmc_port_buffer[i].pbmcb_xoff_thresh, + pbmc.pbmc_port_buffer[i].pbmcb_xon_thresh); + } +} + static int mcx_set_issi(struct mcx_softc *sc, struct mcx_cmdq_entry *cqe, unsigned int slot) @@ -6935,9 +7028,6 @@ mcx_process_cq(struct mcx_softc *sc, str bus_dmamap_sync(sc->sc_dmat, MCX_DMA_MAP(&cq->cq_mem), 0, MCX_DMA_LEN(&cq->cq_mem), BUS_DMASYNC_PREREAD); - cq->cq_count++; - mcx_arm_cq(sc, cq, q->q_uar); - if (rxfree > 0) { if_rxr_put(&rx->rx_rxr, rxfree); if (ifiq_input(rx->rx_ifiq, &ml)) @@ -6947,6 +7037,10 @@ mcx_process_cq(struct mcx_softc *sc, str if (if_rxr_inuse(&rx->rx_rxr) == 0) timeout_add(&rx->rx_refill, 1); } + + cq->cq_count++; + mcx_arm_cq(sc, cq, q->q_uar); + if (txfree > 0) { tx->tx_cons += txfree; if (ifq_is_oactive(tx->tx_ifq)) @@ -6954,7 +7048,6 @@ mcx_process_cq(struct mcx_softc *sc, str } } - static void mcx_arm_eq(struct mcx_softc *sc, struct mcx_eq *eq, int uar) { @@ -8552,9 +8645,13 @@ struct mcx_queuestat { }; static const struct mcx_queuestat mcx_queue_kstat_tpl[] = { - { "RQ SW prod", KSTAT_KV_T_COUNTER64 }, - { "RQ HW prod", KSTAT_KV_T_COUNTER64 }, - { "RQ HW cons", KSTAT_KV_T_COUNTER64 }, + { "RQ SW prod", KSTAT_KV_T_COUNTER32 }, + { "RQ SW prod m", KSTAT_KV_T_UINT32 }, + { "RQ HW prod", KSTAT_KV_T_UINT32 }, + { "RQ HW prod m", KSTAT_KV_T_UINT32 }, + { "RQ HW cons", KSTAT_KV_T_UINT32 }, + { "RQ HW cons m", KSTAT_KV_T_UINT32 }, + { "RQ HW lwm", KSTAT_KV_T_UINT32 }, { "RQ HW state", KSTAT_KV_T_ISTR }, { "SQ SW prod", KSTAT_KV_T_COUNTER64 }, @@ -8563,13 +8660,32 @@ static const struct mcx_queuestat mcx_qu { "SQ HW cons", KSTAT_KV_T_COUNTER64 }, { "SQ HW state", KSTAT_KV_T_ISTR }, - { "CQ SW cons", KSTAT_KV_T_COUNTER64 }, - { "CQ HW prod", KSTAT_KV_T_COUNTER64 }, - { "CQ HW cons", KSTAT_KV_T_COUNTER64 }, - { "CQ HW notify", KSTAT_KV_T_COUNTER64 }, - { "CQ HW solicit", KSTAT_KV_T_COUNTER64 }, + { "CQ SW cons", KSTAT_KV_T_COUNTER32 }, + { "CQ SW cons next", KSTAT_KV_T_UINT32 }, + { "CQ SW cons m", KSTAT_KV_T_UINT32 }, + { "CQ SW cons owner", KSTAT_KV_T_UINT32 }, + { "CQ SW count", KSTAT_KV_T_COUNTER32 }, + { "CQ SW count sn", KSTAT_KV_T_UINT32 }, + + { "CQ HW prod", KSTAT_KV_T_UINT32 }, + { "CQ HW prod m", KSTAT_KV_T_UINT32 }, + { "CQ HW cons", KSTAT_KV_T_UINT32 }, + { "CQ HW cons m", KSTAT_KV_T_UINT32 }, + { "CQ HW diff", KSTAT_KV_T_UINT32 }, + { "CQ HW notify", KSTAT_KV_T_UINT32 }, + { "CQ HW notify m", KSTAT_KV_T_UINT32 }, + { "CQ HW solicit", KSTAT_KV_T_UINT32 }, + { "CQ HW solicit m", KSTAT_KV_T_UINT32 }, { "CQ HW status", KSTAT_KV_T_ISTR }, { "CQ HW state", KSTAT_KV_T_ISTR }, + { "CQ DB[0]", KSTAT_KV_T_UINT32 }, + { "CQ DB[1]", KSTAT_KV_T_UINT32 }, + { "CQ DB[1] arm", KSTAT_KV_T_UINT32 }, + { "CQ DB[1] sn", KSTAT_KV_T_UINT32 }, + { "CQ uval", KSTAT_KV_T_UINT64 }, + { "CQ uval hi", KSTAT_KV_T_UINT32 }, + { "CQ uval lo", KSTAT_KV_T_UINT32 }, + { "CQ next", KSTAT_KV_T_BOOL }, { "EQ SW cons", KSTAT_KV_T_COUNTER64 }, { "EQ HW prod", KSTAT_KV_T_COUNTER64 }, @@ -8620,6 +8736,9 @@ mcx_kstat_queue_read(struct kstat *ks) { struct mcx_queues *q = ks->ks_softc; struct mcx_softc *sc = q->q_sc; + struct mcx_cq *cq = &q->q_cq; + struct mcx_cq_doorbell *db = MCX_DMA_OFF(&sc->sc_doorbell_mem, + cq->cq_doorbell); struct kstat_kv *kvs = ks->ks_data; union { struct mcx_rq_ctx rq; @@ -8637,9 +8756,15 @@ mcx_kstat_queue_read(struct kstat *ks) goto out; } - kstat_kv_u64(kvs++) = q->q_rx.rx_prod; - kstat_kv_u64(kvs++) = bemtoh32(&u.rq.rq_wq.wq_sw_counter); - kstat_kv_u64(kvs++) = bemtoh32(&u.rq.rq_wq.wq_hw_counter); + kstat_kv_u32(kvs++) = q->q_rx.rx_prod; + kstat_kv_u32(kvs++) = q->q_rx.rx_prod % (1 << MCX_LOG_RQ_SIZE); + kstat_kv_u32(kvs++) = bemtoh32(&u.rq.rq_wq.wq_sw_counter); + kstat_kv_u32(kvs++) = bemtoh32(&u.rq.rq_wq.wq_sw_counter) % + (1 << MCX_LOG_RQ_SIZE); + kstat_kv_u32(kvs++) = bemtoh32(&u.rq.rq_wq.wq_hw_counter); + kstat_kv_u32(kvs++) = bemtoh32(&u.rq.rq_wq.wq_hw_counter) % + (1 << MCX_LOG_RQ_SIZE); + kstat_kv_u32(kvs++) = bemtoh32(&u.rq.rq_wq.wq_lwm); switch ((bemtoh32(&u.rq.rq_flags) & MCX_RQ_CTX_STATE_MASK) >> MCX_RQ_CTX_STATE_SHIFT) { case MCX_RQ_CTX_STATE_RST: @@ -8690,11 +8815,31 @@ mcx_kstat_queue_read(struct kstat *ks) goto out; } - kstat_kv_u64(kvs++) = q->q_cq.cq_cons; - kstat_kv_u64(kvs++) = bemtoh32(&u.cq.cq_producer_counter); - kstat_kv_u64(kvs++) = bemtoh32(&u.cq.cq_consumer_counter); - kstat_kv_u64(kvs++) = bemtoh32(&u.cq.cq_last_notified); - kstat_kv_u64(kvs++) = bemtoh32(&u.cq.cq_last_solicit); + kstat_kv_u32(kvs++) = cq->cq_cons; + kstat_kv_u32(kvs++) = cq->cq_cons % (1 << MCX_LOG_CQ_SIZE); + kstat_kv_u32(kvs++) = cq->cq_cons & MCX_CQ_DOORBELL_ARM_CI_MASK; + kstat_kv_u32(kvs++) = (cq->cq_cons >> MCX_LOG_CQ_SIZE) & 1; + kstat_kv_u32(kvs++) = cq->cq_count; + kstat_kv_u32(kvs++) = cq->cq_count & 3; + + kstat_kv_u32(kvs++) = bemtoh32(&u.cq.cq_producer_counter) & + MCX_CQ_DOORBELL_ARM_CI_MASK; + kstat_kv_u32(kvs++) = bemtoh32(&u.cq.cq_producer_counter) % + (1 << MCX_LOG_CQ_SIZE); + kstat_kv_u32(kvs++) = bemtoh32(&u.cq.cq_consumer_counter) & + MCX_CQ_DOORBELL_ARM_CI_MASK; + kstat_kv_u32(kvs++) = bemtoh32(&u.cq.cq_consumer_counter) % + (1 << MCX_LOG_CQ_SIZE); + kstat_kv_u32(kvs++) = ((bemtoh32(&u.cq.cq_producer_counter) << 8) - + (bemtoh32(&u.cq.cq_consumer_counter) << 8)) >> 8; + kstat_kv_u32(kvs++) = bemtoh32(&u.cq.cq_last_notified) & + MCX_CQ_DOORBELL_ARM_CI_MASK; + kstat_kv_u32(kvs++) = bemtoh32(&u.cq.cq_last_notified) % + (1 << MCX_LOG_CQ_SIZE); + kstat_kv_u32(kvs++) = bemtoh32(&u.cq.cq_last_solicit) & + MCX_CQ_DOORBELL_ARM_CI_MASK; + kstat_kv_u32(kvs++) = bemtoh32(&u.cq.cq_last_solicit) % + (1 << MCX_LOG_CQ_SIZE); switch ((bemtoh32(&u.cq.cq_status) & MCX_CQ_CTX_STATUS_MASK) >> MCX_CQ_CTX_STATUS_SHIFT) { @@ -8731,6 +8876,15 @@ mcx_kstat_queue_read(struct kstat *ks) } strlcpy(kstat_kv_istr(kvs), text, sizeof(kstat_kv_istr(kvs))); kvs++; + kstat_kv_u32(kvs++) = bemtoh32(&db->db_update_ci); + kstat_kv_u32(kvs++) = bemtoh32(&db->db_arm_ci); + kstat_kv_u32(kvs++) = bemtoh32(&db->db_arm_ci) & + MCX_CQ_DOORBELL_ARM_CI_MASK; + kstat_kv_u32(kvs++) = (bemtoh32(&db->db_arm_ci) >> 28) & 3; + kstat_kv_u64(kvs++) = q->q_cq.cq_uval; + kstat_kv_u32(kvs++) = q->q_cq.cq_uval >> 32; + kstat_kv_u32(kvs++) = q->q_cq.cq_uval; + kstat_kv_bool(kvs++) = mcx_next_cq_entry(sc, cq) != NULL; if (mcx_query_eq(sc, &q->q_eq, &u.eq) != 0) { error = EIO;