Index: sys/mbuf.h =================================================================== RCS file: /cvs/src/sys/sys/mbuf.h,v retrieving revision 1.242 diff -u -p -r1.242 mbuf.h --- sys/mbuf.h 11 Feb 2019 00:25:33 -0000 1.242 +++ sys/mbuf.h 7 Jun 2019 03:27:41 -0000 @@ -104,7 +104,7 @@ struct pkthdr_pf { u_int8_t flags; u_int8_t routed; u_int8_t prio; - u_int8_t pad[1]; + u_int8_t ecn; }; /* pkthdr_pf.flags */ @@ -226,13 +226,14 @@ struct mbuf { #define M_ICMP_CSUM_IN_OK 0x0400 /* ICMP/ICMPv6 checksum verified */ #define M_ICMP_CSUM_IN_BAD 0x0800 /* ICMP/ICMPv6 checksum bad */ #define M_IPV6_DF_OUT 0x1000 /* don't fragment outgoing IPv6 */ +#define M_TIMESTAMP 0x2000 /* ph_timestamp is set */ #ifdef _KERNEL #define MCS_BITS \ ("\20\1IPV4_CSUM_OUT\2TCP_CSUM_OUT\3UDP_CSUM_OUT\4IPV4_CSUM_IN_OK" \ "\5IPV4_CSUM_IN_BAD\6TCP_CSUM_IN_OK\7TCP_CSUM_IN_BAD\10UDP_CSUM_IN_OK" \ "\11UDP_CSUM_IN_BAD\12ICMP_CSUM_OUT\13ICMP_CSUM_IN_OK\14ICMP_CSUM_IN_BAD" \ - "\15IPV6_NODF_OUT") + "\15IPV6_NODF_OUT" "\16TIMESTAMP") #endif /* mbuf types */ Index: sys/time.h =================================================================== RCS file: /cvs/src/sys/sys/time.h,v retrieving revision 1.41 diff -u -p -r1.41 time.h --- sys/time.h 3 Jun 2019 01:27:30 -0000 1.41 +++ sys/time.h 7 Jun 2019 03:27:41 -0000 @@ -333,6 +333,20 @@ void clock_secs_to_ymdhms(time_t, struct /* Traditional POSIX base year */ #define POSIX_BASE_YEAR 1970 +static __inline void +ns_to_microtime(struct timeval *tv, uint64_t ns) +{ + tv->tv_sec = ns / 1000000000L; + tv->tv_usec = (ns % 1000000000L) / 1000; +} + +static __inline void +ns_to_nanotime(struct timespec *tv, uint64_t ns) +{ + tv->tv_sec = ns / 1000000000L; + tv->tv_nsec = ns % 1000000000L; +} + #else /* !_KERNEL */ #include Index: net/bpf.c =================================================================== RCS file: /cvs/src/sys/net/bpf.c,v retrieving revision 1.175 diff -u -p -r1.175 bpf.c --- net/bpf.c 18 May 2019 12:59:32 -0000 1.175 +++ net/bpf.c 7 Jun 2019 03:27:41 -0000 @@ -1284,13 +1284,25 @@ _bpf_mtap(caddr_t arg, const struct mbuf fcode = bps->bps_bf.bf_insns; slen = bpf_mfilter(fcode, m, pktlen); - if (slen == 0) + if (slen == 0) continue; if (d->bd_fildrop != BPF_FILDROP_PASS) drop = 1; if (d->bd_fildrop != BPF_FILDROP_DROP) { - if (!gottime++) - microtime(&tv); + if (!gottime) { + if (ISSET(m->m_flags, M_PKTHDR) && + ISSET(m->m_pkthdr.csum_flags, + M_TIMESTAMP)) { + struct timeval btv; + ns_to_microtime(&tv, + m->m_pkthdr.ph_timestamp); + microboottime(&btv); + timeradd(&tv, &btv, &tv); + } else + microtime(&tv); + + gottime = 1; + } mtx_enter(&d->bd_mtx); bpf_catchpacket(d, (u_char *)m, pktlen, slen, cpfn, Index: netinet/ip_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.342 diff -u -p -r1.342 ip_input.c --- netinet/ip_input.c 13 Oct 2018 18:36:01 -0000 1.342 +++ netinet/ip_input.c 7 Jun 2019 03:27:41 -0000 @@ -1712,7 +1712,14 @@ ip_savecontrol(struct inpcb *inp, struct if (inp->inp_socket->so_options & SO_TIMESTAMP) { struct timeval tv; - microtime(&tv); + if (ISSET(m->m_pkthdr.csum_flags, M_TIMESTAMP)) { + struct timeval btv; + ns_to_microtime(&tv, m->m_pkthdr.ph_timestamp); + microboottime(&btv); + timeradd(&tv, &btv, &tv); + } else + microtime(&tv); + *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), SCM_TIMESTAMP, SOL_SOCKET); if (*mp) Index: dev/pci/if_mcx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_mcx.c,v retrieving revision 1.16 diff -u -p -r1.16 if_mcx.c --- dev/pci/if_mcx.c 4 Jun 2019 05:29:30 -0000 1.16 +++ dev/pci/if_mcx.c 7 Jun 2019 03:27:41 -0000 @@ -1243,20 +1245,22 @@ struct mcx_cmd_destroy_cq_out { } __packed __aligned(4); struct mcx_cq_entry { - uint32_t cq_reserved1; + uint32_t __reserved__; uint32_t cq_lro; uint32_t cq_lro_ack_seq_num; uint32_t cq_rx_hash; - uint32_t cq_rx_hash_type; + uint8_t cq_rx_hash_type; + uint8_t cq_ml_path; + uint16_t __reserved__; uint32_t cq_checksum; - uint32_t cq_reserved2; + uint32_t __reserved__; uint32_t cq_flags; uint32_t cq_lro_srqn; - uint32_t cq_reserved3[2]; + uint32_t __reserved__[2]; uint32_t cq_byte_cnt; - uint32_t cq_lro_ts_value; - uint32_t cq_lro_ts_echo; - uint32_t cq_flow_tag; + uint64_t cq_timestamp; +#define MCX_CQ_ENTRY_TIMESTAMP_PTP (1 << 63) + uint32_t cq_rx_drops; uint16_t cq_wqe_count; uint8_t cq_signature; uint8_t cq_opcode_owner; @@ -1895,6 +1899,18 @@ struct mcx_cq { int cq_count; }; +struct mcx_calibration { + uint64_t c_timestamp; /* previous mcx chip time */ + uint64_t c_uptime; /* previous kernel nanouptime */ + uint64_t c_tbase; /* mcx chip time */ + uint64_t c_ubase; /* kernel nanouptime */ + uint64_t c_tdiff; + uint64_t c_udiff; +}; + +#define MCX_CALIBRATE_FIRST 2 +#define MCX_CALIBRATE_NORMAL 30 + struct mcx_softc { struct device sc_dev; struct arpcom sc_ac; @@ -1948,6 +1964,11 @@ struct mcx_softc { int sc_extra_mcast; uint8_t sc_mcast_flows[MCX_NUM_MCAST_FLOWS][ETHER_ADDR_LEN]; + struct mcx_calibration sc_calibration[2]; + unsigned int sc_calibration_gen; + uint32_t sc_freq; + struct timeout sc_calibrate; + struct mcx_cq sc_cq[MCX_MAX_CQS]; int sc_num_cq; @@ -2039,8 +2060,8 @@ static void mcx_cmdq_dump(const struct m static void mcx_cmdq_mbox_dump(struct mcx_dmamem *, int); */ static void mcx_refill(void *); -static void mcx_process_rx(struct mcx_softc *, struct mcx_cq_entry *, - struct mbuf_list *, int *); +static int mcx_process_rx(struct mcx_softc *, struct mcx_cq_entry *, + struct mbuf_list *, const struct mcx_calibration *); static void mcx_process_txeof(struct mcx_softc *, struct mcx_cq_entry *, int *); static void mcx_process_cq(struct mcx_softc *, struct mcx_cq *); @@ -2060,6 +2081,9 @@ static void mcx_media_status(struct ifne static int mcx_media_change(struct ifnet *); static int mcx_get_sffpage(struct ifnet *, struct if_sffpage *); +static void mcx_calibrate_first(struct mcx_softc *); +static void mcx_calibrate(void *); + static inline uint32_t mcx_rd(struct mcx_softc *, bus_size_t); static inline void @@ -2067,6 +2091,8 @@ static inline void static inline void mcx_bar(struct mcx_softc *, bus_size_t, bus_size_t, int); +static uint64_t mcx_timer(struct mcx_softc *); + static int mcx_dmamem_alloc(struct mcx_softc *, struct mcx_dmamem *, bus_size_t, u_int align); static void mcx_dmamem_zero(struct mcx_dmamem *); @@ -2338,6 +2364,7 @@ mcx_attach(struct device *parent, struct ether_ifattach(ifp); timeout_set(&sc->sc_rx_refill, mcx_refill, sc); + timeout_set(&sc->sc_calibrate, mcx_calibrate, sc); sc->sc_flow_table_id = -1; for (i = 0; i < MCX_NUM_FLOW_GROUPS; i++) { @@ -5557,9 +5590,65 @@ mcx_process_txeof(struct mcx_softc *sc, ms->ms_m = NULL; } -void +static uint64_t +mcx_uptime(void) +{ + struct timespec ts; + + nanouptime(&ts); + + return ((uint64_t)ts.tv_sec * 1000000000 + (uint64_t)ts.tv_nsec); +} + +static void +mcx_calibrate_first(struct mcx_softc *sc) +{ + struct mcx_calibration *c = &sc->sc_calibration[0]; + + sc->sc_calibration_gen = 0; + + c->c_ubase = mcx_uptime(); + c->c_tbase = mcx_timer(sc); + c->c_tdiff = 0; + + timeout_add_sec(&sc->sc_calibrate, MCX_CALIBRATE_FIRST); +} + +#define MCX_TIMESTAMP_SHIFT 10 + +static void +mcx_calibrate(void *arg) +{ + struct mcx_softc *sc = arg; + struct mcx_calibration *nc, *pc; + unsigned int gen; + + if (!ISSET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING)) + return; + + timeout_add_sec(&sc->sc_calibrate, MCX_CALIBRATE_NORMAL); + + gen = sc->sc_calibration_gen; + pc = &sc->sc_calibration[gen % nitems(sc->sc_calibration)]; + gen++; + nc = &sc->sc_calibration[gen % nitems(sc->sc_calibration)]; + + nc->c_uptime = pc->c_ubase; + nc->c_timestamp = pc->c_tbase; + + nc->c_ubase = mcx_uptime(); + nc->c_tbase = mcx_timer(sc); + + nc->c_udiff = (nc->c_ubase - nc->c_uptime) >> MCX_TIMESTAMP_SHIFT; + nc->c_tdiff = (nc->c_tbase - nc->c_timestamp) >> MCX_TIMESTAMP_SHIFT; + + membar_producer(); + sc->sc_calibration_gen = gen; +} + +static int mcx_process_rx(struct mcx_softc *sc, struct mcx_cq_entry *cqe, - struct mbuf_list *ml, int *slots) + struct mbuf_list *ml, const struct mcx_calibration *c) { struct mcx_slot *ms; struct mbuf *m; @@ -5574,10 +5663,26 @@ mcx_process_rx(struct mcx_softc *sc, str m = ms->ms_m; ms->ms_m = NULL; - m->m_pkthdr.len = m->m_len = betoh32(cqe->cq_byte_cnt); - (*slots)++; + + m->m_pkthdr.len = m->m_len = bemtoh32(&cqe->cq_byte_cnt); + + if (cqe->cq_rx_hash_type) { + m->m_pkthdr.ph_flowid = M_FLOWID_VALID | + bemtoh32(&cqe->cq_rx_hash); + } + + if (c->c_tdiff) { + uint64_t t = bemtoh64(&cqe->cq_timestamp) - c->c_timestamp; + t *= c->c_udiff; + t /= c->c_tdiff; + + m->m_pkthdr.ph_timestamp = c->c_uptime + t; + SET(m->m_pkthdr.csum_flags, M_TIMESTAMP); + } ml_enqueue(ml, m); + + return (1); } static struct mcx_cq_entry * @@ -5624,11 +5729,17 @@ void mcx_process_cq(struct mcx_softc *sc, struct mcx_cq *cq) { struct ifnet *ifp = &sc->sc_ac.ac_if; + const struct mcx_calibration *c; + unsigned int gen; struct mcx_cq_entry *cqe; uint8_t *cqp; struct mbuf_list ml = MBUF_LIST_INITIALIZER(); int rxfree, txfree; + gen = sc->sc_calibration_gen; + membar_consumer(); + c = &sc->sc_calibration[gen % nitems(sc->sc_calibration)]; + rxfree = 0; txfree = 0; while ((cqe = mcx_next_cq_entry(sc, cq))) { @@ -5639,7 +5750,7 @@ mcx_process_cq(struct mcx_softc *sc, str mcx_process_txeof(sc, cqe, &txfree); break; case MCX_CQ_ENTRY_OPCODE_SEND: - mcx_process_rx(sc, cqe, &ml, &rxfree); + rxfree += mcx_process_rx(sc, cqe, &ml, c); break; case MCX_CQ_ENTRY_OPCODE_REQ_ERR: case MCX_CQ_ENTRY_OPCODE_SEND_ERR: @@ -5882,6 +5993,8 @@ mcx_up(struct mcx_softc *sc) sc->sc_rx_prod = 0; mcx_rx_fill(sc); + mcx_calibrate_first(sc); + SET(ifp->if_flags, IFF_RUNNING); sc->sc_tx_cons = 0; @@ -5922,6 +6035,8 @@ mcx_down(struct mcx_softc *sc) intr_barrier(&sc->sc_ih); ifq_barrier(&ifp->if_snd); + timeout_del_barrier(&sc->sc_calibrate); + for (group = 0; group < MCX_NUM_FLOW_GROUPS; group++) { if (sc->sc_flow_group_id[group] != -1) mcx_destroy_flow_group(sc, @@ -6435,6 +6550,26 @@ static inline void mcx_bar(struct mcx_softc *sc, bus_size_t r, bus_size_t l, int f) { bus_space_barrier(sc->sc_memt, sc->sc_memh, r, l, f); +} + +static uint64_t +mcx_timer(struct mcx_softc *sc) +{ + uint32_t hi, lo, ni; + + hi = mcx_rd(sc, MCX_INTERNAL_TIMER_H); + for (;;) { + lo = mcx_rd(sc, MCX_INTERNAL_TIMER_L); + mcx_bar(sc, MCX_INTERNAL_TIMER_L, 8, BUS_SPACE_BARRIER_READ); + ni = mcx_rd(sc, MCX_INTERNAL_TIMER_H); + + if (ni == hi) + break; + + hi = ni; + } + + return (((uint64_t)hi << 32) | (uint64_t)lo); } static int