Index: sys/arch/amd64/amd64/conf.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/conf.c,v retrieving revision 1.74 diff -u -p -r1.74 conf.c --- sys/arch/amd64/amd64/conf.c 11 Nov 2021 10:03:08 -0000 1.74 +++ sys/arch/amd64/amd64/conf.c 6 Jun 2022 06:05:19 -0000 @@ -136,6 +136,7 @@ cdev_decl(cy); #include "bktr.h" #include "ksyms.h" #include "kstat.h" +#include "llt.h" #include "usb.h" #include "uhid.h" #include "fido.h" @@ -212,7 +213,8 @@ struct cdevsw cdevsw[] = cdev_notdef(), /* 28 was LKM */ cdev_notdef(), /* 29 */ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ - cdev_notdef(), /* 31 */ + cdev_lltrace_init(NLLT,lltrace), + /* 31: lltrace */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ cdev_notdef(), /* 34 */ Index: sys/arch/amd64/amd64/intr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/intr.c,v retrieving revision 1.55 diff -u -p -r1.55 intr.c --- sys/arch/amd64/amd64/intr.c 28 Dec 2020 14:23:30 -0000 1.55 +++ sys/arch/amd64/amd64/intr.c 6 Jun 2022 06:05:19 -0000 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -531,10 +532,16 @@ intr_handler(struct intrframe *frame, st if (need_lock) __mp_lock(&kernel_lock); #endif + floor = ci->ci_handled_intr_level; ci->ci_handled_intr_level = ih->ih_level; + + LLTRACE_CPU(ci, lltrace_irq, 0, ih->ih_slot); rc = (*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : frame); + LLTRACE_CPU(ci, lltrace_irqret, 0, ih->ih_slot); + ci->ci_handled_intr_level = floor; + #ifdef MULTIPROCESSOR if (need_lock) __mp_unlock(&kernel_lock); Index: sys/arch/amd64/amd64/ipi.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/ipi.c,v retrieving revision 1.17 diff -u -p -r1.17 ipi.c --- sys/arch/amd64/amd64/ipi.c 21 Jan 2020 02:01:50 -0000 1.17 +++ sys/arch/amd64/amd64/ipi.c 6 Jun 2022 06:05:19 -0000 @@ -35,9 +35,10 @@ #include #include #include +#include +#include #include -#include #include #include #include @@ -45,6 +46,8 @@ void x86_send_ipi(struct cpu_info *ci, int ipimask) { + LLTRACE(lltrace_ipi, ci->ci_cpuid); + x86_atomic_setbits_u32(&ci->ci_ipis, ipimask); /* Don't send IPI to cpu which isn't (yet) running. */ @@ -57,6 +60,10 @@ x86_send_ipi(struct cpu_info *ci, int ip int x86_fast_ipi(struct cpu_info *ci, int ipi) { +#if 0 + LLTRACE(lltrace_ipi, ci->ci_cpuid); +#endif + if (!(ci->ci_flags & CPUF_RUNNING)) return (ENOENT); @@ -72,6 +79,8 @@ x86_broadcast_ipi(int ipimask) int count = 0; CPU_INFO_ITERATOR cii; + LLTRACE_CPU(self, lltrace_ipi, ~0); + CPU_INFO_FOREACH(cii, ci) { if (ci == self) continue; @@ -102,7 +111,10 @@ x86_ipi_handler(void) for (bit = 0; bit < X86_NIPI && pending; bit++) { if (pending & (1 << bit)) { pending &= ~(1 << bit); + + LLTRACE_CPU(ci, lltrace_irq, LLTRACE_IRQ_IPI, bit); (*ipifunc[bit])(ci); + LLTRACE_CPU(ci, lltrace_irqret, LLTRACE_IRQ_IPI, bit); ipi_count.ec_count++; } } Index: sys/arch/amd64/amd64/softintr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/softintr.c,v retrieving revision 1.10 diff -u -p -r1.10 softintr.c --- sys/arch/amd64/amd64/softintr.c 11 Sep 2020 09:27:09 -0000 1.10 +++ sys/arch/amd64/amd64/softintr.c 6 Jun 2022 06:05:19 -0000 @@ -34,9 +34,12 @@ * Generic soft interrupt implementation for NetBSD/x86. */ +#include "llt.h" + #include #include #include +#include #include @@ -99,8 +102,9 @@ softintr_dispatch(int which) uvmexp.softs++; mtx_leave(&si->softintr_lock); - + //LLTRACE_CPU(ci, lltrace_irq, LLTRACE_IRQ_BOTTOM_HALF, which); (*sih->sih_fn)(sih->sih_arg); + //LLTRACE_CPU(ci, lltrace_irqret, LLTRACE_IRQ_BOTTOM_HALF, which); } KERNEL_UNLOCK(); Index: sys/arch/amd64/include/cpu.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v retrieving revision 1.142 diff -u -p -r1.142 cpu.h --- sys/arch/amd64/include/cpu.h 26 Apr 2022 08:35:30 -0000 1.142 +++ sys/arch/amd64/include/cpu.h 6 Jun 2022 06:05:19 -0000 @@ -108,7 +108,6 @@ struct cpu_info { struct schedstate_percpu ci_schedstate; /* scheduler state */ struct cpu_info *ci_next; - struct proc *ci_curproc; u_int ci_cpuid; u_int ci_apicid; u_int ci_acpi_proc_id; @@ -119,9 +118,10 @@ struct cpu_info { u_int64_t ci_user_cr3; /* U-K page table */ /* bits for mitigating Micro-architectural Data Sampling */ - char ci_mds_tmp[32]; /* 32byte aligned */ + char ci_mds_tmp[32]; /* 32byte aligned */ void *ci_mds_buf; + struct proc *ci_curproc; struct pmap *ci_proc_pmap; /* last userspace pmap */ struct pcb *ci_curpcb; struct pcb *ci_idle_pcb; Index: sys/arch/arm64/arm64/conf.c =================================================================== RCS file: /cvs/src/sys/arch/arm64/arm64/conf.c,v retrieving revision 1.19 diff -u -p -r1.19 conf.c --- sys/arch/arm64/arm64/conf.c 11 Nov 2021 10:03:08 -0000 1.19 +++ sys/arch/arm64/arm64/conf.c 6 Jun 2022 06:05:19 -0000 @@ -91,6 +91,7 @@ cdev_decl(lpt); #include "bktr.h" #include "ksyms.h" #include "kstat.h" +#include "llt.h" #include "usb.h" #include "uhid.h" #include "fido.h" @@ -156,7 +157,8 @@ struct cdevsw cdevsw[] = cdev_notdef(), /* 28 was LKM */ cdev_notdef(), /* 29 */ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ - cdev_notdef(), /* 31 */ + cdev_lltrace_init(NLLT,lltrace), + /* 31: lltrace */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ cdev_notdef(), /* 34 */ Index: sys/arch/arm64/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/arch/arm64/conf/GENERIC,v retrieving revision 1.229 diff -u -p -r1.229 GENERIC --- sys/arch/arm64/conf/GENERIC 2 Jun 2022 03:09:39 -0000 1.229 +++ sys/arch/arm64/conf/GENERIC 6 Jun 2022 06:05:19 -0000 @@ -259,6 +259,7 @@ mvrtc* at fdt? mvspi* at fdt? moxtet* at spi? mvsw* at fdt? +eport* at mvsw? mvtemp* at fdt? mvuart* at fdt? sfp* at fdt? Index: sys/arch/arm64/dev/agintc.c =================================================================== RCS file: /cvs/src/sys/arch/arm64/dev/agintc.c,v retrieving revision 1.36 diff -u -p -r1.36 agintc.c --- sys/arch/arm64/dev/agintc.c 2 Jan 2022 20:00:21 -0000 1.36 +++ sys/arch/arm64/dev/agintc.c 6 Jun 2022 06:05:19 -0000 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -940,7 +941,11 @@ agintc_run_handler(struct intrhand *ih, else arg = frame; + LLTRACE(lltrace_irq, ih->ih_ipl == IPL_IPI ? LLTRACE_IRQ_IPI : 0, + ih->ih_irq); handled = ih->ih_func(arg); + LLTRACE(lltrace_irqret, ih->ih_ipl == IPL_IPI ? LLTRACE_IRQ_IPI : 0, + ih->ih_irq); if (handled) ih->ih_count.ec_count++; @@ -1197,6 +1202,8 @@ agintc_send_ipi(struct cpu_info *ci, int { struct agintc_softc *sc = agintc_sc; uint64_t sendmask; + + LLTRACE(lltrace_ipi, ci->ci_cpuid); if (ci == curcpu() && id == ARM_IPI_NOP) return; Index: sys/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v retrieving revision 1.284 diff -u -p -r1.284 GENERIC --- sys/conf/GENERIC 19 Apr 2022 01:32:06 -0000 1.284 +++ sys/conf/GENERIC 6 Jun 2022 06:05:21 -0000 @@ -82,6 +82,7 @@ pseudo-device endrun 1 # EndRun line dis pseudo-device vnd 4 # vnode disk devices pseudo-device ksyms 1 # kernel symbols device pseudo-device kstat # kernel statistics device +pseudo-device llt # low-level tracing device # clonable devices pseudo-device bpfilter # packet filter Index: sys/conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.714 diff -u -p -r1.714 files --- sys/conf/files 19 Mar 2022 10:25:09 -0000 1.714 +++ sys/conf/files 6 Jun 2022 06:05:21 -0000 @@ -573,6 +573,9 @@ file dev/ksyms.c ksyms needs-flag pseudo-device kstat file dev/kstat.c kstat needs-flag +pseudo-device llt +file dev/lltrace.c llt needs-flag + pseudo-device fuse file miscfs/fuse/fuse_device.c fuse needs-flag file miscfs/fuse/fuse_file.c fuse Index: sys/dev/lltrace.c =================================================================== RCS file: sys/dev/lltrace.c diff -N sys/dev/lltrace.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/dev/lltrace.c 6 Jun 2022 06:05:21 -0000 @@ -0,0 +1,953 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 The University of Queensland + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This code was written by David Gwynne as part + * of the Information Technology Infrastructure Group (ITIG) in the + * Faculty of Engineering, Architecture and Information Technology + * (EAIT). + * + * It was heavily inspired by and aims to be largely compatible + * with the KUTrace (kernel/userland tracing) framework by Richard + * L. Sites. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#if defined(__amd64__) || defined(__i386__) + +static inline unsigned int +lltrace_cas(unsigned int *p, unsigned int e, unsigned int n) +{ + __asm volatile("cmpxchgl %2, %1" + : "=a" (e), "=m" (*p) + : "r" (n), "a" (e), "m" (*p)); + + return (e); +} + +static inline uint64_t +lltrace_ts(void) +{ + unsigned int hi, lo; + + __asm volatile("lfence; rdtsc" : "=d" (hi), "=a" (lo)); + + return (lo >> 6); +} + +static inline uint64_t +lltrace_ts_long(void) +{ + return (rdtsc_lfence() >> 6); +} + +#elsif defined(__aarch64__) + +#define lltrace_cas(_p, _e, _n) atomic_cas_uint((_p), (_e), (_n)) + +static inline uint64_t +lltrace_ts_long(void) +{ + uint64_t ts; + + __asm volatile("mrs %x0, cntvct_el0" : "=r" (ts)); + + return (ts); +} + +#define lltrace_ts() lltrace_ts_long() + +#else /* not x86 or arm64 */ + +static unsigned int +lltrace_cas(unsigned int *p, unsigned int e, unsigned int n) +{ + unsigned int o; + int s; + + s = intr_disable(); + o = *p; + if (o == e) + *p = n; + intr_restore(s); + + return (o); +} + +static inline uint64_t +lltrace_ts(void) +{ + return (countertime()); +} + +static inline uint64_t +lltrace_ts_long(void) +{ + return (countertime()); +} + +#endif + +#define LLTRACE_MB2NBUF(_mb) \ + (((_mb) * (1U << 20)) / sizeof(struct lltrace_buffer)) +#define LLTRACE_NBUF2MB(_nbuf) \ + (((_nbuf) * sizeof(struct lltrace_buffer)) / (1U << 20)) + +#define LLTRACE_BLEN_DEFAULT 16 + +struct lltrace_cpu { + SIMPLEQ_ENTRY(lltrace_cpu) + llt_entry; + struct lltrace_buffer llt_buffer; + unsigned int llt_slot; + pid_t llt_tid; +}; + +SIMPLEQ_HEAD(lltrace_cpu_list, lltrace_cpu); + +struct lltrace_softc { + unsigned int sc_running; + unsigned int sc_mode; + struct rwlock sc_lock; + unsigned int sc_nbuffers; + + unsigned int sc_free; + unsigned int sc_used; + struct lltrace_cpu **sc_ring; + struct lltrace_cpu *sc_buffers; + + unsigned int sc_read; + unsigned int sc_reading; + struct selinfo sc_sel; + + uint64_t sc_boottime; + uint64_t sc_monotime; +}; + +static int lltrace_start(struct lltrace_softc *, struct proc *); +static int lltrace_stop(struct lltrace_softc *, struct proc *); +static int lltrace_flush(struct lltrace_softc *); + +static struct lltrace_softc *lltrace_sc; + +static void lltrace_arg32(struct lltrace_cpu *, uint64_t, unsigned int); + +int +lltattach(int num) +{ + return (0); +} + +int +lltraceopen(dev_t dev, int flag, int mode, struct proc *p) +{ + struct lltrace_softc *sc; + int error; + + if (minor(dev) != 0) + return (ENXIO); + + error = suser(p); + if (error != 0) + return (error); + + if (lltrace_sc != NULL) + return (EBUSY); + + sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (sc == NULL) + return (ENOMEM); + + sc->sc_running = 0; + sc->sc_nbuffers = LLTRACE_MB2NBUF(LLTRACE_BLEN_DEFAULT); + + rw_init(&sc->sc_lock, "lltlk"); + + sc->sc_read = 0; + sc->sc_reading = 0; + klist_init_rwlock(&sc->sc_sel.si_note, &sc->sc_lock); + + /* commit */ + if (atomic_cas_ptr(&lltrace_sc, NULL, sc) != NULL) { + free(sc, M_DEVBUF, sizeof(*sc)); + return (EBUSY); + } + + return (0); +} + +int +lltraceclose(dev_t dev, int flag, int mode, struct proc *p) +{ + struct lltrace_softc *sc = lltrace_sc; + + rw_enter_write(&sc->sc_lock); + lltrace_stop(sc, p); + lltrace_flush(sc); + rw_exit_write(&sc->sc_lock); + + lltrace_sc = NULL; + membar_sync(); + + free(sc, M_DEVBUF, sizeof(*sc)); + + return (0); +} + +static int +lltrace_fionread(struct lltrace_softc *sc) +{ + int canread; + + rw_enter_read(&sc->sc_lock); + canread = !sc->sc_running && (sc->sc_buffers != NULL) && + (sc->sc_read < sc->sc_nbuffers); + rw_exit_read(&sc->sc_lock); + + return (canread ? sizeof(struct lltrace_buffer) : 0); +} + +static void +lltrace_cpu_init(struct lltrace_cpu *llt, struct lltrace_softc *sc, + struct cpu_info *ci, pid_t tid) +{ + struct lltrace_header *llh; + + llh = (struct lltrace_header *)&llt->llt_buffer; + llh->h_cpu = cpu_number(); + llh->h_boottime = sc->sc_boottime; + llh->h_start_cy = lltrace_ts_long(); + llh->h_start_ns = nsecuptime() - sc->sc_monotime; + llh->h_end_cy = 0; + llh->h_end_ns = 0; + llh->h_idletid = ci->ci_schedstate.spc_idleproc->p_tid; + llh->h_tid = tid; + llh->h_zero = 0; + + llt->llt_tid = tid; + llt->llt_slot = 8; +} + +static void +lltrace_cpu_fini(struct lltrace_cpu *llt, struct lltrace_softc *sc) +{ + struct lltrace_header *llh; + + llh = (struct lltrace_header *)&llt->llt_buffer; + llh->h_end_cy = lltrace_ts_long(); + llh->h_end_ns = nsecuptime() - sc->sc_monotime; +} + +static int +lltrace_set_mode(struct lltrace_softc *sc, unsigned int mode) +{ + int error; + + if (mode >= LLTRACE_MODE_COUNT) + return (EINVAL); + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + return (error); + + if (sc->sc_running) + error = EBUSY; + else + sc->sc_mode = mode; + + rw_exit(&sc->sc_lock); + return (error); +} + +static int +lltrace_set_blen(struct lltrace_softc *sc, unsigned int blen) +{ + int error; + unsigned int nbuffers; + + if (blen < LLTRACE_BLEN_MIN || blen > LLTRACE_BLEN_MAX) + return (EINVAL); + + /* convert megabytes to the number of buffers */ + nbuffers = LLTRACE_MB2NBUF(blen); + if (nbuffers <= ncpus) + EINVAL; + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + return (error); + + if (sc->sc_buffers != NULL) + error = EBUSY; + else { + sc->sc_nbuffers = nbuffers; + printf("%s[%u]: nbuffers %u\n", __func__, __LINE__, sc->sc_nbuffers); +} + + rw_exit(&sc->sc_lock); + return (error); +} + +static int +lltrace_start(struct lltrace_softc *sc, struct proc *p) +{ + struct bintime boottime; + unsigned int i; + size_t sz; + struct lltrace_cpu_list l = SIMPLEQ_HEAD_INITIALIZER(l); + struct lltrace_cpu *llt; + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + + if (sc->sc_running) + return EINVAL; + + if (sc->sc_nbuffers <= (ncpus * 2 + 1)) + return (EINVAL); + + lltrace_flush(sc); + + sc->sc_monotime = nsecuptime(); + + binboottime(&boottime); + sc->sc_boottime = BINTIME_TO_NSEC(&boottime) + sc->sc_monotime; + + sz = roundup(sc->sc_nbuffers * sizeof(*sc->sc_buffers), PAGE_SIZE); + sc->sc_buffers = km_alloc(sz, &kv_any, &kp_dirty, &kd_waitok); + if (sc->sc_buffers == NULL) + return (ENOMEM); + sc->sc_ring = mallocarray(sc->sc_nbuffers, sizeof(*sc->sc_ring), + M_DEVBUF, M_WAITOK); + for (i = 0; i < sc->sc_nbuffers; i++) { + llt = &sc->sc_buffers[i]; + llt->llt_slot = 0; + sc->sc_ring[i] = llt; + } + + sc->sc_free = 0; /* next slot to pull a free buffer from */ + sc->sc_used = 0; /* next slot to put a used buffer in */ + + CPU_INFO_FOREACH(cii, ci) { + i = sc->sc_free++; /* can't wrap yet */ + + llt = sc->sc_ring[i]; + sc->sc_ring[i] = NULL; + + SIMPLEQ_INSERT_HEAD(&l, llt, llt_entry); + } + + CPU_INFO_FOREACH(cii, ci) { + sched_peg_curproc(ci); + + llt = SIMPLEQ_FIRST(&l); + SIMPLEQ_REMOVE_HEAD(&l, llt_entry); + + lltrace_cpu_init(llt, sc, ci, p->p_tid); + lltrace_pidname(llt, p); + + membar_producer(); + ci->ci_schedstate.spc_lltrace = llt; + } + atomic_clearbits_int(&p->p_flag, P_CPUPEG); + + sc->sc_running = 1; + + return (0); +} + +static int +lltrace_stop(struct lltrace_softc *sc, struct proc *p) +{ + struct lltrace_cpu *llt; + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + unsigned long s; + + if (!sc->sc_running) + return (EALREADY); + + sc->sc_running = 0; + + /* visit each cpu to take llt away safely */ + CPU_INFO_FOREACH(cii, ci) { + sched_peg_curproc(ci); + + s = intr_disable(); + llt = ci->ci_schedstate.spc_lltrace; + ci->ci_schedstate.spc_lltrace = NULL; + intr_restore(s); + + lltrace_cpu_fini(llt, sc); + } + atomic_clearbits_int(&p->p_flag, P_CPUPEG); + + return (0); +} + +static int +lltrace_flush(struct lltrace_softc *sc) +{ + size_t sz; + + rw_assert_wrlock(&sc->sc_lock); + if (sc->sc_running) + return (EBUSY); + + if (sc->sc_buffers == NULL) + return (0); + + sz = roundup(sc->sc_nbuffers * sizeof(*sc->sc_buffers), PAGE_SIZE); + km_free(sc->sc_buffers, sz, &kv_any, &kp_dirty); + free(sc->sc_ring, M_DEVBUF, sc->sc_nbuffers * sizeof(*sc->sc_ring)); + + sc->sc_buffers = NULL; + sc->sc_ring = NULL; + sc->sc_read = 0; + + return (0); +} + +int +lltraceioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) +{ + struct lltrace_softc *sc = lltrace_sc; + int error = 0; + + KERNEL_UNLOCK(); + + switch (cmd) { + case FIONREAD: + *(int *)data = lltrace_fionread(sc); + break; + case FIONBIO: + /* vfs tracks this for us if we let it */ + break; + + case LLTIOCSTART: + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + break; + error = lltrace_start(sc, p); + rw_exit(&sc->sc_lock); + break; + case LLTIOCSTOP: + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + break; + error = lltrace_stop(sc, p); + rw_exit(&sc->sc_lock); + break; + case LLTIOCFLUSH: + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + break; + error = lltrace_flush(sc); + rw_exit(&sc->sc_lock); + break; + + case LLTIOCSBLEN: + error = lltrace_set_blen(sc, *(unsigned int *)data); + break; + case LLTIOCGBLEN: + *(unsigned int *)data = LLTRACE_NBUF2MB(sc->sc_nbuffers); + break; + + case LLTIOCSMODE: + error = lltrace_set_mode(sc, *(unsigned int *)data); + break; + case LLTIOCGMODE: + *(unsigned int *)data = sc->sc_mode; + break; + + default: + error = ENOTTY; + break; + } + + KERNEL_LOCK(); + + return (error); +} + +int +lltraceread(dev_t dev, struct uio *uio, int ioflag) +{ + struct lltrace_softc *sc = lltrace_sc; + struct lltrace_cpu *llt; + unsigned int slot; + int error; + + KERNEL_UNLOCK(); + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + goto lock; + + if (sc->sc_running) { + if (ISSET(ioflag, IO_NDELAY)) { + error = EWOULDBLOCK; + goto unlock; + } + + do { + sc->sc_reading++; + error = rwsleep_nsec(&sc->sc_reading, &sc->sc_lock, + PRIBIO|PCATCH, "lltread", INFSLP); + sc->sc_reading--; + if (error != 0) + goto unlock; + } while (sc->sc_running); + } + + if (sc->sc_buffers == NULL) { + error = 0; + goto unlock; + } + + slot = sc->sc_read; + for (;;) { + if (slot >= sc->sc_nbuffers) { + error = 0; + goto unlock; + } + + llt = &sc->sc_buffers[slot]; + KASSERT(llt->llt_slot <= nitems(llt->llt_buffer.llt_slots)); + if (llt->llt_slot > 0) + break; + + slot++; + } + + error = uiomove(&llt->llt_buffer, + llt->llt_slot * sizeof(llt->llt_buffer.llt_slots[0]), uio); + if (error != 0) + goto unlock; + + sc->sc_read = slot + 1; + +unlock: + rw_exit(&sc->sc_lock); +lock: + KERNEL_LOCK(); + return (error); +} + +static void +lltrace_filt_detach(struct knote *kn) +{ + struct lltrace_softc *sc = kn->kn_hook; + + klist_remove(&sc->sc_sel.si_note, kn); +} + +static int +lltrace_filt_event(struct knote *kn, long hint) +{ + struct lltrace_softc *sc = kn->kn_hook; + int canread; + + canread = !sc->sc_running && (sc->sc_buffers != NULL) && + (sc->sc_read < sc->sc_nbuffers); + + kn->kn_data = canread ? sizeof(struct lltrace_buffer) : 0; + + return (canread); +} + +static int +lltrace_filt_modify(struct kevent *kev, struct knote *kn) +{ + struct lltrace_softc *sc = kn->kn_hook; + int active; + + rw_enter_write(&sc->sc_lock); + active = knote_modify_fn(kev, kn, lltrace_filt_event); + rw_exit_write(&sc->sc_lock); + + return (active); +} + +static int +lltrace_filt_process(struct knote *kn, struct kevent *kev) +{ + struct lltrace_softc *sc = kn->kn_hook; + int active; + + rw_enter_write(&sc->sc_lock); + active = knote_process_fn(kn, kev, lltrace_filt_event); + rw_exit_write(&sc->sc_lock); + + return (active); +} + +static const struct filterops lltrace_filtops = { + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, + .f_attach = NULL, + .f_detach = lltrace_filt_detach, + .f_event = lltrace_filt_event, + .f_modify = lltrace_filt_modify, + .f_process = lltrace_filt_process, +}; + +int +lltracekqfilter(dev_t dev, struct knote *kn) +{ + struct lltrace_softc *sc = lltrace_sc; + struct klist *klist; + + switch (kn->kn_filter) { + case EVFILT_READ: + klist = &sc->sc_sel.si_note; + kn->kn_fop = &lltrace_filtops; + break; + default: + return (EINVAL); + } + + kn->kn_hook = sc; + klist_insert(klist, kn); + + return (0); +} + +static struct lltrace_cpu * +lltrace_next(struct lltrace_cpu *llt) +{ + struct lltrace_softc *sc = lltrace_sc; + struct cpu_info *ci = curcpu(); + struct lltrace_cpu *nllt; + unsigned int slot, oslot, nslot; + + /* check if we were preempted */ + nllt = ci->ci_schedstate.spc_lltrace; + if (nllt != llt) { + /* something preempted us and swapped buffers already */ + return (nllt); + } + + slot = sc->sc_free; + for (;;) { + nslot = slot + 1; + if (nslot > sc->sc_nbuffers) { + if (sc->sc_mode == LLTRACE_MODE_HEAD) + return (NULL); + } + + oslot = atomic_cas_uint(&sc->sc_free, slot, nslot); + if (slot == oslot) + break; + + slot = oslot; + } + + slot %= sc->sc_nbuffers; + nllt = sc->sc_ring[slot]; + sc->sc_ring[slot] = NULL; + + slot = sc->sc_used; + for (;;) { + nslot = slot + 1; + + oslot = atomic_cas_uint(&sc->sc_used, slot, nslot); + if (slot == oslot) + break; + + slot = oslot; + } + + lltrace_cpu_init(nllt, sc, ci, llt->llt_tid); + lltrace_cpu_fini(llt, sc); + + slot %= sc->sc_nbuffers; + sc->sc_ring[slot] = llt; + + ci->ci_schedstate.spc_lltrace = nllt; + + return (nllt); +} + +static unsigned int +lltrace_insert(struct lltrace_cpu *llt, uint64_t record, + const uint64_t *extra, unsigned int n) +{ + unsigned int slot, oslot, nslot; + uint64_t *slots; + + n++; + record |= lltrace_ts() << LLTRACE_TIMESTAMP_SHIFT; + + slot = llt->llt_slot; + for (;;) { + nslot = slot + n; + if (nslot > nitems(llt->llt_buffer.llt_slots)) { + unsigned long s; + + s = intr_disable(); + llt = lltrace_next(llt); + intr_restore(s); + + if (llt == NULL) + return (1); + + slot = llt->llt_slot; + continue; + } + + oslot = lltrace_cas(&llt->llt_slot, slot, nslot); + if (slot == oslot) + break; + + slot = oslot; + } + + slots = llt->llt_buffer.llt_slots + slot; + *slots = record; + while (n > 1) { + *(++slots) = *(extra++); + n--; + } + + return (0); +} + +void +lltrace_statclock(struct lltrace_cpu *llt, int usermode, unsigned long pc) +{ + uint64_t event = usermode ? LLTRACE_EVENT_PC_U : LLTRACE_EVENT_PC_K; + uint64_t extra[1] = { pc }; + + lltrace_insert(llt, (event | nitems(extra)) << LLTRACE_EVENT_SHIFT, + extra, nitems(extra)); +} + +void +lltrace_syscall(struct lltrace_cpu *llt, register_t code, + size_t argsize, const register_t *args) +{ + uint64_t record = LLTRACE_EVENT_SYSCALL(code) << LLTRACE_EVENT_SHIFT; + + if (argsize > 0) { + uint64_t arg0 = args[0] & LLTRACE_ARG0_MASK; + record |= arg0 << LLTRACE_ARG0_SHIFT; + } + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_sysret(struct lltrace_cpu *llt, register_t code, + int error, const register_t retvals[2]) +{ + uint64_t record = LLTRACE_EVENT_SYSRET(code) << LLTRACE_EVENT_SHIFT; + uint64_t arg0 = error & LLTRACE_ARG0_MASK; + record |= arg0 << LLTRACE_ARG0_SHIFT; + unsigned int stop; + + stop = lltrace_insert(llt, record, NULL, 0); + + if (stop) { + struct lltrace_softc *sc = lltrace_sc; + + rw_enter_write(&sc->sc_lock); + if (sc->sc_running) + lltrace_stop(sc, curproc); + + KNOTE(&sc->sc_sel.si_note, 0); + if (sc->sc_reading) + wakeup(&sc->sc_reading); + rw_exit_write(&sc->sc_lock); + } +} + +void +lltrace_pidname(struct lltrace_cpu *llt, struct proc *p) +{ + uint64_t record; + uint64_t extra[3]; + unsigned int l, n; + pid_t tid = p->p_tid; + + if (ISSET(p->p_p->ps_flags, PS_SYSTEM)) + tid |= LLTRACE_EVENT_PID_ARG_KTHREAD; + + CTASSERT(sizeof(extra) == sizeof(p->p_p->ps_comm)); + + extra[0] = extra[1] = extra[2] = 0; /* memset */ + l = strlcpy((char *)extra, p->p_p->ps_comm, sizeof(extra)); + + /* turn the string length into the number of slots we need */ + n = howmany(l, sizeof(uint64_t)); + + record = (LLTRACE_EVENT_PID | n) << LLTRACE_EVENT_SHIFT; + record |= (tid & LLTRACE_ARG32_MASK) << LLTRACE_ARG32_SHIFT; + + llt->llt_tid = p->p_tid; + + lltrace_insert(llt, record, extra, n); +} + +void +lltrace_sched_enter(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_SCHED << LLTRACE_EVENT_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_sched_leave(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_SCHEDRET << LLTRACE_EVENT_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_idle(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_IDLE << LLTRACE_EVENT_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +static void +lltrace_arg32(struct lltrace_cpu *llt, uint64_t event, unsigned int arg32) +{ + uint64_t record; + + record = event << LLTRACE_EVENT_SHIFT; + record |= (arg32 & LLTRACE_ARG32_MASK) << LLTRACE_ARG32_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_runnable(struct lltrace_cpu *llt, struct proc *p) +{ + lltrace_arg32(llt, LLTRACE_EVENT_RUNNABLE, p->p_tid); +} + +void +lltrace_trap(struct lltrace_cpu *llt, unsigned int trap) +{ + lltrace_arg32(llt, LLTRACE_EVENT_TRAP, trap); +} + +void +lltrace_trapret(struct lltrace_cpu *llt, unsigned int trap) +{ + lltrace_arg32(llt, LLTRACE_EVENT_TRAPRET, trap); +} + +void +lltrace_ipi(struct lltrace_cpu *llt, unsigned int cpu) +{ + lltrace_arg32(llt, LLTRACE_EVENT_IPI, cpu); +} + +void +lltrace_irq(struct lltrace_cpu *llt, unsigned int type, unsigned int vec) +{ + lltrace_arg32(llt, LLTRACE_EVENT_IRQ(type), vec); +} + +void +lltrace_irqret(struct lltrace_cpu *llt, unsigned int type, unsigned int vec) +{ + lltrace_arg32(llt, LLTRACE_EVENT_IRQRET(type), vec); +} + +void +lltrace_lock(struct lltrace_cpu *llt, void *lock, unsigned int op) +{ + lltrace_arg32(llt, LLTRACE_EVENT_LOCK(op), (uint32_t)(intptr_t)lock); +} + +void +lltrace_klock(struct lltrace_cpu *llt, void *lock, unsigned int op) +{ +#if 0 + lltrace_arg32(llt, LLTRACE_EVENT_LOCK(op), (uint32_t)(intptr_t)lock); +#endif +} + +void +lltrace_pkts(struct lltrace_cpu *llt, unsigned int t, unsigned int v) +{ + t &= LLTRACE_PKTS_T_MASK; + + v <<= LLTRACE_PKTS_V_SHIFT; + v &= LLTRACE_PKTS_V_MASK; + + lltrace_arg32(llt, LLTRACE_EVENT_PKTS, t | v); +} + +void +lltrace_mark(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_MARK << LLTRACE_EVENT_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +__cyg_profile_func_enter(void *fn, void *pc) +{ + struct lltrace_cpu *llt; + uint64_t record; + + llt = lltrace_enter(); + if (llt == NULL) + return; + + record = LLTRACE_EVENT_KFUNC_ENTER << LLTRACE_EVENT_SHIFT; + record |= ((uintptr_t)fn & LLTRACE_ARG32_MASK) << LLTRACE_ARG32_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +__cyg_profile_func_exit(void *fn, void *pc) +{ + struct lltrace_cpu *llt; + uint64_t record; + + llt = lltrace_enter(); + if (llt == NULL) + return; + + record = LLTRACE_EVENT_KFUNC_LEAVE << LLTRACE_EVENT_SHIFT; + record |= ((uintptr_t)fn & LLTRACE_ARG32_MASK) << LLTRACE_ARG32_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} Index: sys/dev/fdt/files.fdt =================================================================== RCS file: /cvs/src/sys/dev/fdt/files.fdt,v retrieving revision 1.162 diff -u -p -r1.162 files.fdt --- sys/dev/fdt/files.fdt 30 Jan 2022 21:40:50 -0000 1.162 +++ sys/dev/fdt/files.fdt 6 Jun 2022 06:05:21 -0000 @@ -462,9 +462,11 @@ device mvspi: spi attach mvspi at fdt file dev/fdt/mvspi.c mvspi -device mvsw +device mvsw {} attach mvsw at fdt -file dev/fdt/mvsw.c mvsw +device eport: ether, ifnet, mii, ifmedia +attach eport at mvsw +file dev/fdt/mvsw.c mvsw | eport device mvtemp attach mvtemp at fdt Index: sys/dev/fdt/if_mvneta.c =================================================================== RCS file: /cvs/src/sys/dev/fdt/if_mvneta.c,v retrieving revision 1.26 diff -u -p -r1.26 if_mvneta.c --- sys/dev/fdt/if_mvneta.c 5 Jun 2022 02:54:18 -0000 1.26 +++ sys/dev/fdt/if_mvneta.c 6 Jun 2022 06:05:21 -0000 @@ -175,6 +175,8 @@ struct mvneta_softc { int sc_sfp; int sc_node; + struct if_device sc_ifd; + #if NKSTAT > 0 struct mutex sc_kstat_lock; struct timeout sc_kstat_tick; @@ -792,6 +794,10 @@ mvneta_attach_deferred(struct device *se */ if_attach(ifp); ether_ifattach(ifp); + + sc->sc_ifd.if_node = sc->sc_node; + sc->sc_ifd.if_ifp = ifp; + if_register(&sc->sc_ifd); #if NKSTAT > 0 mvneta_kstat_attach(sc); Index: sys/dev/fdt/mvsw.c =================================================================== RCS file: /cvs/src/sys/dev/fdt/mvsw.c,v retrieving revision 1.5 diff -u -p -r1.5 mvsw.c --- sys/dev/fdt/mvsw.c 6 Apr 2022 18:59:28 -0000 1.5 +++ sys/dev/fdt/mvsw.c 6 Jun 2022 06:05:21 -0000 @@ -15,10 +15,19 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include "bpfilter.h" + #include #include #include +#include +#include +#include +#include +#include +#include #include +#include #include #include @@ -27,7 +36,31 @@ #include #include +#include +#include +#include + #include +#include + +#include +#include +#include + +#include +#include + +#include /* if_trunk.h uses siphash bits */ +#include + +#if NBPFILTER > 0 +#include +#endif + +#define MVSW_MAX_PORTS 11 + +#define ETHERTYPE_MVSW_ETAG ETHERTYPE_802_EX1 +#define ETHERTYPE_MVSW_DEFAULT 0x9000 /* who cares? */ /* Registers */ @@ -49,34 +82,304 @@ #define MVSW_SMI_TIMEOUT 1600 /* Switch registers */ -#define MVSW_PORT(x) (0x10 + (x)) -#define MVSW_G2 0x1c +#define MVSW_PORT(x) (0x10 + (x)) /* Port */ +#define MVSW_G1 0x1b /* Global1 */ +#define MVSW_G2 0x1c /* Global2 */ +/* + * Port registers */ #define MVSW_PORT_SWITCHID 0x03 #define MVSW_PORT_SWITCHID_PROD_MASK 0xfff0 #define MVSW_PORT_SWITCHID_PROD_88E6141 0x3400 #define MVSW_PORT_SWITCHID_PROD_88E6341 0x3410 #define MVSW_PORT_SWITCHID_REV_MASK 0x000f -#define MVSW_PORT_CTRL 0x04 -#define MVSW_PORT_CTRL_STATE_MASK 0x0003 -#define MVSW_PORT_CTRL_STATE_FORWARD 0x0003 +#define MVSW_PORT_CTRL0 0x04 +#define MVSW_PORT_CTRL0_STATE_MASK (0x3 << 0) +#define MVSW_PORT_CTRL0_STATE_DISABLED (0x0 << 0) +#define MVSW_PORT_CTRL0_STATE_BLOCKING (0x1 << 0) +#define MVSW_PORT_CTRL0_STATE_LEARNING (0x2 << 0) +#define MVSW_PORT_CTRL0_STATE_FORWARD (0x3 << 0) +#define MVSW_PORT_CTRL0_EGRESS_FLOOD_MCAST (0x1 << 2) +#define MVSW_PORT_CTRL0_EGRESS_FLOOD_UCAST (0x1 << 3) +#define MVSW_PORT_CTRL0_TAG_IF_BOTH (0x1 << 6) +#define MVSW_PORT_CTRL0_VLAN_TUNNEL (0x1 << 7) +#define MVSW_PORT_CTRL0_FRAME_MODE_MASK (0x3 << 8) +#define MVSW_PORT_CTRL0_FRAME_MODE_NORMAL (0x0 << 8) +#define MVSW_PORT_CTRL0_FRAME_MODE_TAG (0x1 << 8) +#define MVSW_PORT_CTRL0_FRAME_MODE_PROVIDER (0x2 << 8) +#define MVSW_PORT_CTRL0_FRAME_MODE_ETAG (0x3 << 8) +#define MVSW_PORT_CTRL0_IGMP_MLD_SNOOP (0x1 << 10) +#define MVSW_PORT_CTRL0_HEADER (0x1 << 11) +#define MVSW_PORT_CTRL0_EGRESS_MODE_MASK (0x3 << 12) +#define MVSW_PORT_CTRL0_EGRESS_MODE_UNMODIFIED (0x0 << 12) +#define MVSW_PORT_CTRL0_EGRESS_MODE_UNTAGGED (0x1 << 12) +#define MVSW_PORT_CTRL0_EGRESS_MODE_TAGGED (0x2 << 12) +#define MVSW_PORT_CTRL0_EGRESS_MODE_ETAG (0x3 << 12) +#define MVSW_PORT_CTRL0_SAFILTER_MASK (0x3 << 14) +#define MVSW_PORT_CTRL0_SAFILTER_DROP_ON_LOCK (0x1 << 14) +#define MVSW_PORT_CTRL0_SAFILTER_DROP_ON_UNLOCK (0x2 << 14) +#define MVSW_PORT_CTRL0_SAFILTER_DROP_TO_CPU (0x3 << 14) + +#define MVSW_PORT_CTRL1 0x05 +#define MVSW_PORT_CTRL1_FID_HI_SHIFT 0 +#define MVSW_PORT_CTRL1_FID_HI_MASK 0xff +#define MVSW_PORT_CTRL1_TRUNK_ID_SHIFT 8 +#define MVSW_PORT_CTRL1_TRUNK_ID_MASK 0x0f +#define MVSW_PORT_CTRL1_TRUNK_PORT (0x1 << 14) +#define MVSW_PORT_CTRL1_MESSAGE_PORT (0x1 << 15) + +#define MVSW_PORT_BASED_VLAN 0x06 +#define MVSW_PORT_BASED_VLAN_FID_LO_SHIFT 0 +#define MVSW_PORT_BASED_VLAN_FID_LO_MASK 0 + +/* Default Port VLAN */ +#define MVSW_PORT_DEFAULT_VLAN 0x07 +#define MVSW_PORT_DEVAULT_VLAN_VID_SHIFT 0 +#define MVSW_PORT_DEVAULT_VLAN_VID_MASK 0xfff + +/* Port Control 2 */ +#define MVSW_PORT_CTRL2 0x08 +#define MVSW_PORT_CTRL2_JUMBO_MODE_MASK (0x3 << 12) +#define MVSW_PORT_CTRL2_JUMBO_MODE_1522 (0x0 << 12) +#define MVSW_PORT_CTRL2_JUMBO_MODE_2048 (0x1 << 12) +#define MVSW_PORT_CTRL2_JUMBO_MODE_10240 (0x2 << 12) +#define MVSW_PORT_CTRL2_8021Q_MODE_MASK (0x3 << 10) +#define MVSW_PORT_CTRL2_8021Q_MODE_DISABLED (0x0 << 10) +#define MVSW_PORT_CTRL2_8021Q_MODE_FALLBACK (0x1 << 10) +#define MVSW_PORT_CTRL2_8021Q_MODE_CHECK (0x2 << 10) +#define MVSW_PORT_CTRL2_8021Q_MODE_SECURE (0x3 << 10) +#define MVSW_PORT_CTRL2_DISCARD_TAGGED (0x1 << 9) +#define MVSW_PORT_CTRL2_DISCARD_UNTAGGED (0x1 << 8) +#define MVSW_PORT_CTRL2_MAP_DA (0x1 << 7) + +/* Port Association Vector */ +#define MVSW_PORT_ASSOC_VECTOR 0x0b +#define MVSW_PORT_ASSOC_VECTOR_HOLD_AT_1 (0x1 << 15) +#define MVSW_PORT_ASSOC_VECTOR_INT_AGE_OUT (0x1 << 14) +#define MVSW_PORT_ASSOC_VECTOR_LOCKED_PORT (0x1 << 13) +#define MVSW_PORT_ASSOC_VECTOR_IGNORE_WRONG (0x1 << 12) +#define MVSW_PORT_ASSOC_VECTOR_REFRESH_LOCKED (0x1 << 11) +/* i think low bits are a bitmap of relevant ports */ + +#define MVSW_PORT_ETH_TYPE 0x0f + +/* + * Global1 registers + */ + +/* ATU FID */ +#define MVSW_G1_ATU_FID 0x01 + +#define MVSW_G1_VTU_OP 0x05 +#define MVSW_G1_VTU_OP_BUSY (0x1 << 15) +#define MVSW_G1_VTU_OP_MASK (0x7 << 12) +#define MVSW_G1_VTU_OP_FLUSH_ALL (0x1 << 12) +#define MVSW_G1_VTU_OP_NOOP (0x2 << 12) +#define MVSW_G1_VTU_OP_VTU_LOAD_PURGE (0x3 << 12) +#define MVSW_G1_VTU_OP_VTU_GET_NEXT (0x4 << 12) +#define MVSW_G1_VTU_OP_STU_LOAD_PURGE (0x5 << 12) +#define MVSW_G1_VTU_OP_STU_GET_NEXT (0x6 << 12) +#define MVSW_G1_VTU_OP_GET_CLR_VIOLATION (0x7 << 12) +#define MVSW_G1_VTU_OP_MEMBER_VIOLATION (0x1 << 6) +#define MVSW_G1_VTU_OP_MISS_VIOLATION (0x1 << 5) +#define MVSW_G1_VTU_OP_SPID_MASK (0xf << 0) + +/* ATU Control */ +#define MVSW_G1_ATU_CTRL 0x0a +#define MVSW_G1_ATU_CTRL_LEARN2ALL (0x1 << 3) + +/* ATU Operation */ +#define MVSW_G1_ATU_OP 0x0a +#define MVSW_G1_ATU_OP_BUSY (0x1 << 15) +#define MVSW_G1_ATU_OP_MASK (0x7 << 12) +#define MVSW_G1_ATU_OP_NOOP (0x0 << 12) +#define MVSW_G1_ATU_OP_FLUSH_MOVE_ALL (0x1 << 12) +#define MVSW_G1_ATU_OP_FLUSH_MOVE_NON_STATIC (0x2 << 12) +#define MVSW_G1_ATU_OP_LOAD_DB (0x3 << 12) +#define MVSW_G1_ATU_OP_GET_NEXT_DB (0x4 << 12) +#define MVSW_G1_ATU_OP_FLUSH_MOVE_ALL_DB (0x5 << 12) +#define MVSW_G1_ATU_OP_FLUSH_MOVE_NON_STATIC_DB (0x6 << 12) +#define MVSW_G1_ATU_OP_GET_CLR_VIOLATION (0x7 << 12) +#define MVSW_G1_ATU_OP_AGE_OUT_VIOLATION (0x1 << 7) +#define MVSW_G1_ATU_OP_MEMBER_VIOLATION (0x1 << 6) +#define MVSW_G1_ATU_OP_MISS_VIOLATION (0x1 << 5) +#define MVSW_G1_ATU_OP_FULL_VIOLATION (0x1 << 4) + +/* ATU Data */ +#define MVSW_G1_ATU_DATA 0x0c +#define MVSW_G1_ATU_DATA_TRUNK (0x1 << 15) +#define MVSW_G1_ATU_DATA_TRUNK_ID_MASK (0xf << 4) +#define MVSW_G1_ATU_DATA_PORT_VECTOR_MASK (0x3ff << 4) +#define MVSW_G1_ATU_DATA_STATE_MASK (0xf << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_UNUSED (0x0 << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_AGE_1_OLDEST (0x1 << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_AGE_2 (0x2 << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_AGE_3 (0x3 << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_AGE_4 (0x4 << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_AGE_5 (0x5 << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_AGE_6 (0x6 << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_AGE_7_NEWEST (0x7 << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_STATIC_POLICY (0x8 << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_STATIC_POLICY_PO (0x9 << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_STATIC_AVB_NRL (0xa << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_STATIC_AVB_NRL_PO (0xb << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_STATIC_DA_MGMT (0xc << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_STATIC_DA_MGMT_PO (0xd << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_STATIC (0xe << 0) +#define MVSW_G1_ATU_DATA_STATE_UC_STATIC_PO (0xf << 0) +#define MVSW_G1_ATU_DATA_STATE_MC_UNUSED (0x0 << 0) +#define MVSW_G1_ATU_DATA_STATE_MC_STATIC_POLICY (0x4 << 0) +#define MVSW_G1_ATU_DATA_STATE_MC_STATIC_AVB_NRL (0x5 << 0) +#define MVSW_G1_ATU_DATA_STATE_MC_STATIC_DA_MGMT (0x6 << 0) +#define MVSW_G1_ATU_DATA_STATE_MC_STATIC (0x7 << 0) +#define MVSW_G1_ATU_DATA_STATE_MC_STATIC_POLICY_PO (0xc << 0) +#define MVSW_G1_ATU_DATA_STATE_MC_STATIC_AVB_NRL_PO (0xd << 0) +#define MVSW_G1_ATU_DATA_STATE_MC_STATIC_DA_MGMT_PO (0xe << 0) +#define MVSW_G1_ATU_DATA_STATE_MC_STATIC_PO (0xf << 0) + +#define MVSW_G1_ATU_MAC_BASE 0x0d +#define MVSW_G1_ATU_MAC_01 (MVSW_G1_ATU_MAC_BASE + 0) +#define MVSW_G1_ATU_MAC_23 (MVSW_G1_ATU_MAC_BASE + 1) +#define MVSW_G1_ATU_MAC_45 (MVSW_G1_ATU_MAC_BASE + 2) + +/* Monitor & MGMT Control */ +#define MVSW_G1_MONITOR_MGMT_CTL 0x1a +#define MVSW_G1_MONITOR_MGMT_CTL_UPDATE (0x1 << 15) +#define MVSW_G1_MONITOR_MGMT_CTL_PTR_MASK (0x3f << 8) +#define MVSW_G1_MONITOR_MGMT_CTL_PTR_0180C200000XLO (0x00 << 8) +#define MVSW_G1_MONITOR_MGMT_CTL_PTR_0180C200000XHI (0x01 << 8) +#define MVSW_G1_MONITOR_MGMT_CTL_PTR_0180C200002XLO (0x02 << 8) +#define MVSW_G1_MONITOR_MGMT_CTL_PTR_0180C200002XHI (0x03 << 8) +#define MVSW_G1_MONITOR_MGMT_CTL_PTR_INGRESS_DEST (0x20 << 8) +#define MVSW_G1_MONITOR_MGMT_CTL_PTR_EGRESS_DEST (0x21 << 8) +#define MVSW_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST (0x30 << 8) +#define MVSW_G1_MONITOR_MGMT_CTL_DATA_SHIFT 0 +#define MVSW_G1_MONITOR_MGMT_CTL_DATA_MASK 0xff +#define MVSW_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST_MGMTPRI 0xe0 + +/* Control2 */ +#define MVSW_G1_CTRL2 0x1c +#define MVSW_G1_CTRL2_DEVICE_NUMBER_SHIFT 0 +#define MVSW_G1_CTRL2_DEVICE_NUMBER_MASK 0x1f +#define MVSW_G1_CTRL2_RMU_MODE_MASK (0x7 << 8) +#define MVSW_G1_CTRL2_RMU_MODE_PORT_0 (0x0 << 8) +#define MVSW_G1_CTRL2_RMU_MODE_PORT_1 (0x1 << 8) +#define MVSW_G1_CTRL2_RMU_MODE_ALL_DSA (0x6 << 8) +#define MVSW_G1_CTRL2_RMU_MODE_DISABLED (0x7 << 8) + +/* + * Global2 registers + */ + +/* Trunk Mask Table */ +#define MVSW_G2_TRUNK_MASK 0x07 +#define MVSW_G2_TRUNK_MASK_UPDATE (0x1 << 15) +#define MVSW_G2_TRUNK_MASK_SHIFT 12 +#define MVSW_G2_TRUNK_MASK_COUNT 8 /* 0x0 to 0x7 */ +#define MVSW_G2_TRUNK_MASK_HASH (0x1 << 11) +/* low bits are a bitmap of ports in the trunk i think */ + +/* Trunk Mapping Table */ +#define MVSW_G2_TRUNK_MAPPING 0x08 +#define MVSW_G2_TRUNK_MAPPING_UPDATE (0x1 << 15) +#define MVSW_G2_TRUNK_MAPPING_ID_SHIFT 11 +#define MVSW_G2_TRUNK_MAPPING_ID_COUNT 16 /* 0x0 to 0xf */ +/* low bits are a bitmap of ports in the trunk i think */ + +/* Ingress Rate Command */ +#define MVSW_G2_IRL_CMD 0x09 +#define MVSW_G2_IRL_CMD_BUSY (0x1 << 15) +#define MVSW_G2_IRL_CMD_OP_MASK (0x7 << 12) +#define MVSW_G2_IRL_CMD_OP_NOOP (0x0 << 12) +#define MVSW_G2_IRL_CMD_OP_INIT_ALL (0x1 << 12) +#define MVSW_G2_IRL_CMD_OP_INIT_RES (0x2 << 12) +#define MVSW_G2_IRL_CMD_OP_WRITE_REG (0x3 << 12) +#define MVSW_G2_IRL_CMD_OP_READ_REG (0x4 << 12) +#define MVSW_G2_IRL_CMD_PORT_SHIFT 8 +#define MVSW_G2_IRL_CMD_PORT_MASK 0xf +#define MVSW_G2_IRL_CMD_RES_MASK (0x7 << 5) +#define MVSW_G2_IRL_CMD_REG_MASK (0xf << 0) + +/* Ingress Rate Data */ +#define MVSW_G2_IRL_DATA 0x0a + #define MVSW_G2_SMI_PHY_CMD 0x18 #define MVSW_G2_SMI_PHY_DATA 0x19 +/* Misc */ +#define MVSW_G2_MISC 0x1d +#define MVSW_G2_MISC_5BIT_PORT (0x1 << 14) + /* SERDES registers */ #define MVSW_SERDES(x) (0x10 + (x)) #define MVSW_SERDES_BMCR (0x2000 + MII_BMCR) +struct mvsw_tag { + uint16_t tag0; +#define MVSW_TAG_MODE_SHIFT 14 +#define MVSW_TAG_MODE_MASK (0x3 << MVSW_TAG_MODE_SHIFT) +#define MVSW_TAG_MODE_TO_CPU (0x0 << MVSW_TAG_MODE_SHIFT) +#define MVSW_TAG_MODE_FROM_CPU (0x1 << MVSW_TAG_MODE_SHIFT) +#define MVSW_TAG_MODE_TO_SNIFFER (0x2 << MVSW_TAG_MODE_SHIFT) +#define MVSW_TAG_MODE_TAG (0x3 << MVSW_TAG_MODE_SHIFT) + +#define MVSW_TAG_IEEE (1 << 13) + +#define MVSW_TAG_SWITCH_SHIFT 8 +#define MVSW_TAG_SWITCH_MASK 0x1f + +#define MVSW_TAG_PORT_SHIFT 3 +#define MVSW_TAG_PORT_MASK 0x1f + + uint16_t tag1; +}; + +struct mvsw_etag { + uint16_t reserved; + uint16_t tag0; + uint16_t tag1; +}; + /* XXX #include */ #define MDIO_MMD_PHYXS 4 +/* + * The driver. + */ + +struct mvsw_port { + int p_port; + struct mvsw_softc *p_softc; + struct ifnet *p_ifp0; + + int (*p_ioctl)(struct ifnet *, u_long, caddr_t); + void (*p_input)(struct ifnet *, struct mbuf *); + int (*p_output)(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); + + TAILQ_ENTRY(mvsw_port) p_entry; +}; +TAILQ_HEAD(mvsw_ports, mvsw_port); + +struct eport_softc; +static void eport_input(struct eport_softc *, struct mbuf *); + struct mvsw_softc { - struct device sc_dev; + struct device sc_dev; - struct mii_bus *sc_mdio; - int sc_reg; + int sc_node; + struct mii_bus *sc_mdio; + int sc_reg; + + unsigned int sc_nports; + struct eport_softc *sc_ports[MVSW_MAX_PORTS]; + struct mvsw_ports sc_cpus; + + caddr_t sc_bpf; }; +#define DEVNAME(_sc) ((_sc)->sc_dev.dv_xname) + int mvsw_match(struct device *, void *, void *); void mvsw_attach(struct device *, struct device *, void *); @@ -88,6 +391,23 @@ struct cfdriver mvsw_cd = { NULL, "mvsw", DV_DULL }; +struct mvsw_defer { + struct task d_task; + struct mvsw_softc *d_sc; +}; + +static void mvsw_attach_deferred(void *); + +static void mvsw_attach_cpu(struct mvsw_softc *, int, uint32_t); +static void mvsw_config_cpu(struct mvsw_softc *, struct mvsw_port *); + +static int mvsw_p_ioctl(struct ifnet *, u_long, caddr_t); +static void mvsw_p_input(struct ifnet *, struct mbuf *); +static int mvsw_p_output(struct ifnet *, struct mbuf *, + struct sockaddr *, struct rtentry *); + +static int mvsw_print(void *, const char *); + int mvsw_smi_read(struct mvsw_softc *, int, int); void mvsw_smi_write(struct mvsw_softc *, int, int, int); int mvsw_phy_read(struct mvsw_softc *, int, int); @@ -95,7 +415,25 @@ void mvsw_phy_write(struct mvsw_softc *, int mvsw_serdes_read(struct mvsw_softc *, int, int, int); void mvsw_serdes_write(struct mvsw_softc *, int, int, int, int); -void mvsw_port_enable(struct mvsw_softc *, int); +static int mvsw_wait(struct mvsw_softc *, int, int, uint16_t, uint16_t, + const char *); + +#define mvsw_vtu_wait(_sc) \ + mvsw_wait((_sc), MVSW_G1, MVSW_G1_VTU_OP, \ + MVSW_G1_VTU_OP_BUSY, 0, "mvswvtu") + +#define mvsw_atu_wait(_sc) \ + mvsw_wait((_sc), MVSW_G1, MVSW_G1_ATU_OP, \ + MVSW_G1_ATU_OP_BUSY, 0, "mvswatu") + +#define mvsw_irl_wait(_sc) \ + mvsw_wait((_sc), MVSW_G2, MVSW_G2_IRL_CMD, \ + MVSW_G2_IRL_CMD_BUSY, 0, "mvswirl") + +static int mvsw_vtu_op(struct mvsw_softc *, uint16_t); +static int mvsw_atu_op(struct mvsw_softc *, uint16_t, uint16_t, uint16_t); +static int mvsw_irl_op(struct mvsw_softc *, uint16_t); + void mvsw_phy_enable(struct mvsw_softc *, int); void mvsw_serdes_enable(struct mvsw_softc *, int); @@ -112,15 +450,18 @@ mvsw_attach(struct device *parent, struc { struct mvsw_softc *sc = (struct mvsw_softc *)self; struct fdt_attach_args *faa = aux; - int ports, port, node; - uint32_t phy; - uint16_t swid; + uint16_t r; + struct mvsw_defer *d; + + TAILQ_INIT(&sc->sc_cpus); + sc->sc_nports = nitems(sc->sc_ports); if (faa->fa_nreg < 1) { printf(": no registers\n"); return; } + sc->sc_node = faa->fa_node; sc->sc_reg = faa->fa_reg[0].addr; printf(" phy %d", sc->sc_reg); @@ -130,25 +471,149 @@ mvsw_attach(struct device *parent, struc return; } - swid = mvsw_smi_read(sc, MVSW_PORT(0), MVSW_PORT_SWITCHID); - switch (swid & MVSW_PORT_SWITCHID_PROD_MASK) { + r = mvsw_smi_read(sc, MVSW_PORT(0), MVSW_PORT_SWITCHID); + switch (r & MVSW_PORT_SWITCHID_PROD_MASK) { case MVSW_PORT_SWITCHID_PROD_88E6141: + sc->sc_nports = 6; printf(": 88E6141"); break; case MVSW_PORT_SWITCHID_PROD_88E6341: + sc->sc_nports = 6; printf(": 88E6341"); break; default: printf(": unknown product 0x%04x\n", - swid & MVSW_PORT_SWITCHID_PROD_MASK); + r & MVSW_PORT_SWITCHID_PROD_MASK); return; } - printf(" rev %d\n", swid & MVSW_PORT_SWITCHID_REV_MASK); + printf(" rev %d\n", r & MVSW_PORT_SWITCHID_REV_MASK); - ports = OF_getnodebyname(faa->fa_node, "ports"); - if (ports == 0) + if (sc->sc_dev.dv_unit & ~MVSW_G1_CTRL2_DEVICE_NUMBER_MASK) { + printf("%s: too many switches\n", DEVNAME(sc)); return; + } + + /* + * wait until the cpu port is (probably) attached to wire things up. + */ + + d = malloc(sizeof(*d), M_TEMP, M_WAITOK); + task_set(&d->d_task, mvsw_attach_deferred, d); + d->d_sc = sc; + + config_pending_incr(); + task_add(systq, &d->d_task); +} + +static void +mvsw_attach_deferred(void *arg) +{ + struct mvsw_defer *d = arg; + struct mvsw_softc *sc = d->d_sc; + int ports, port, node, i; + uint32_t phy, phandle; + uint16_t r; + struct mvsw_port *p; + + free(d, M_TEMP, sizeof(*d)); + + for (port = 0; port < sc->sc_nports; port++) { + /* start with all ports disabled */ + r = mvsw_smi_read(sc, MVSW_PORT(port), MVSW_PORT_CTRL0); + CLR(r, MVSW_PORT_CTRL0_STATE_MASK); + SET(r, MVSW_PORT_CTRL0_STATE_DISABLED); + mvsw_smi_write(sc, MVSW_PORT(port), MVSW_PORT_CTRL0, r); + + r = mvsw_smi_read(sc, MVSW_PORT(port), MVSW_PORT_CTRL1); + CLR(r, MVSW_PORT_CTRL1_MESSAGE_PORT); + mvsw_smi_write(sc, MVSW_PORT(port), MVSW_PORT_CTRL1, r); + + mvsw_smi_write(sc, MVSW_PORT(port), MVSW_PORT_DEFAULT_VLAN, 0); + + /* reset ingress rate limiting (IRL) */ + if (mvsw_irl_op(sc, MVSW_G2_IRL_CMD_OP_INIT_ALL | + (port << MVSW_G2_IRL_CMD_PORT_SHIFT)) == -1) { + printf("%s: unable to reset ingress rate limiting " + "on port %u\n", DEVNAME(sc), port); + /* we can carry on */ + } + } + + /* flush the vlan translation unit */ + if (mvsw_vtu_wait(sc) == -1) { + printf("%s: VLAN Translation Unit busy\n", DEVNAME(sc)); + goto done; + } + + if (mvsw_vtu_op(sc, MVSW_G1_VTU_OP_FLUSH_ALL) == -1) { + printf("%s: VLAN Translation Unit flush timeout\n", + DEVNAME(sc)); + goto done; + } + + /* clear 5 bit port use in port vlan table (PVT) */ + r = mvsw_smi_read(sc, MVSW_G2, MVSW_G2_MISC); + CLR(r, MVSW_G2_MISC_5BIT_PORT); + mvsw_smi_write(sc, MVSW_G2, MVSW_G2_MISC, r); + + /* XXX PVT clear/reset/setup? */ + + /* flush the address translation unit */ + if (mvsw_atu_wait(sc) == -1) { + printf("%s: Address Translation Unit busy\n", DEVNAME(sc)); + goto done; + } + + if (mvsw_atu_op(sc, 0, MVSW_G1_ATU_OP_FLUSH_MOVE_ALL, 0) == -1) { + printf("%s: Address Translation Unit flush timeout\n", + DEVNAME(sc)); + goto done; + } + + /* XXX clear priority overrite table */ + + r = mvsw_smi_read(sc, MVSW_G1, MVSW_G1_CTRL2); + /* set device number */ + CLR(r, MVSW_G1_CTRL2_DEVICE_NUMBER_MASK << + MVSW_G1_CTRL2_DEVICE_NUMBER_SHIFT); + SET(r, sc->sc_dev.dv_unit << + MVSW_G1_CTRL2_DEVICE_NUMBER_SHIFT); + + /* disable remote management */ + CLR(r, MVSW_G1_CTRL2_RMU_MODE_MASK); + SET(r, MVSW_G1_CTRL2_RMU_MODE_DISABLED); + mvsw_smi_write(sc, MVSW_G1, MVSW_G1_CTRL2, r); + + /* clear trunk setup */ + for (i = 0; i < MVSW_G2_TRUNK_MASK_COUNT; i++) { + /* XXX dlg - this stops packets, needs more investigation. */ +#if 0 + mvsw_smi_write(sc, MVSW_G2, MVSW_G2_TRUNK_MASK, + MVSW_G2_TRUNK_MASK_UPDATE | + (i << MVSW_G2_TRUNK_MASK_SHIFT) | /* clear bitmap */ 0); +#endif + } + for (i = 0; i < MVSW_G2_TRUNK_MAPPING_ID_COUNT; i++) { + /* XXX dlg - this stops packets, needs more investigation. */ +#if 0 + mvsw_smi_write(sc, MVSW_G2, MVSW_G2_TRUNK_MAPPING, + MVSW_G2_TRUNK_MAPPING_UPDATE | + (i << MVSW_G2_TRUNK_MAPPING_ID_SHIFT) | + /* clear bitmap */ 0); +#endif + } + + ports = OF_getnodebyname(sc->sc_node, "ports"); + if (ports == 0) + goto done; + for (port = OF_child(ports); port; port = OF_peer(port)) { + char status[32]; + + if (OF_getprop(node, "status", status, sizeof(status)) > 0 && + strcmp(status, "disabled") == 0) + continue; + phy = OF_getpropint(port, "phy-handle", 0); node = OF_getnodebyphandle(phy); if (node) @@ -156,8 +621,270 @@ mvsw_attach(struct device *parent, struc else mvsw_serdes_enable(sc, port); - mvsw_port_enable(sc, port); + phandle = OF_getpropint(port, "ethernet", 0); + if (phandle != 0) + mvsw_attach_cpu(sc, port, phandle); + else { + uint32_t reg = OF_getpropint(port, "reg", + MVSW_MAX_PORTS); + struct device *child; + + if (reg < sc->sc_nports) { + child = config_found(&sc->sc_dev, &port, + mvsw_print); + sc->sc_ports[reg] = + (struct eport_softc *)child; + } + } } + + p = TAILQ_FIRST(&sc->sc_cpus); + if (p == NULL) { + printf("%s: no CPU ports found\n", DEVNAME(sc)); + goto done; + } + + mvsw_config_cpu(sc, p); + + r = 0x1 << p->p_port; + for (port = 0; port < sc->sc_nports; port++) { + if (sc->sc_ports[port] == NULL) + continue; + + mvsw_smi_write(sc, MVSW_PORT(port), + MVSW_PORT_BASED_VLAN, r); + } + +done: + config_pending_decr(); +} + +static void +mvsw_attach_cpu(struct mvsw_softc *sc, int node, uint32_t phandle) +{ + struct ifnet *ifp0; + struct arpcom *ac0; + struct mvsw_port *p; + int port; + uint16_t r; + + port = OF_getpropint(node, "reg", -1); + if (port == -1) { + printf("%s: can't find cpu interface port number\n", + DEVNAME(sc)); + return; + } + + ifp0 = if_byphandle(phandle); + if (ifp0 == NULL) { + printf("%s: unable to find cpu interface on port %u\n", + DEVNAME(sc), port); + return; + } + + if (ifp0->if_type != IFT_ETHER) { + printf("%s: unsupported type of cpu interface on port %u\n", + DEVNAME(sc), port); + return; + } + + printf("%s: %s at port %u\n", DEVNAME(sc), ifp0->if_xname, port); + + NET_LOCK(); + ac0 = (struct arpcom *)ifp0; + if (ac0->ac_trunkport != NULL) { + printf("%s: cpu interface %s is busy\n", + DEVNAME(sc), ifp0->if_xname); + NET_UNLOCK(); + return; + } + + p = malloc(sizeof(*p), M_DEVBUF, M_WAITOK); + + p->p_softc = sc; + p->p_ifp0 = ifp0; + p->p_port = port; + p->p_ioctl = ifp0->if_ioctl; + p->p_input = ifp0->if_input; + p->p_output = ifp0->if_output; + + TAILQ_INSERT_TAIL(&sc->sc_cpus, p, p_entry); + + if (ifpromisc(ifp0, 1) != 0) + printf("%s: %s promisc error\n", DEVNAME(sc), ifp0->if_xname); + + ac0->ac_trunkport = p; + /* membar_producer()? */ + ifp0->if_ioctl = mvsw_p_ioctl; + ifp0->if_input = mvsw_p_input; + ifp0->if_output = mvsw_p_output; + NET_UNLOCK(); + + /* Enable port. */ + r = mvsw_smi_read(sc, MVSW_PORT(port), MVSW_PORT_CTRL0); + CLR(r, MVSW_PORT_CTRL0_STATE_MASK); + SET(r, MVSW_PORT_CTRL0_STATE_FORWARD); + CLR(r, MVSW_PORT_CTRL0_FRAME_MODE_MASK); + SET(r, MVSW_PORT_CTRL0_FRAME_MODE_ETAG); + CLR(r, MVSW_PORT_CTRL0_EGRESS_MODE_MASK); + SET(r, MVSW_PORT_CTRL0_EGRESS_MODE_ETAG); + SET(r, MVSW_PORT_CTRL0_EGRESS_FLOOD_UCAST | + MVSW_PORT_CTRL0_EGRESS_FLOOD_MCAST); + mvsw_smi_write(sc, MVSW_PORT(port), MVSW_PORT_CTRL0, r); + + r = mvsw_smi_read(sc, MVSW_PORT(port), MVSW_PORT_CTRL2); + CLR(r, MVSW_PORT_CTRL2_MAP_DA); + CLR(r, MVSW_PORT_CTRL2_JUMBO_MODE_MASK); + SET(r, MVSW_PORT_CTRL2_JUMBO_MODE_10240); + CLR(r, MVSW_PORT_CTRL2_8021Q_MODE_MASK); + SET(r, MVSW_PORT_CTRL2_8021Q_MODE_DISABLED); + CLR(r, MVSW_PORT_CTRL2_DISCARD_TAGGED); + CLR(r, MVSW_PORT_CTRL2_DISCARD_UNTAGGED); + mvsw_smi_write(sc, MVSW_PORT(port), MVSW_PORT_CTRL2, r); + + mvsw_smi_write(sc, MVSW_PORT(port), MVSW_PORT_ASSOC_VECTOR, + 0x1 << port); + + mvsw_smi_write(sc, MVSW_PORT(port), MVSW_PORT_ETH_TYPE, + ETHERTYPE_MVSW_ETAG); +} + +static void +mvsw_config_cpu(struct mvsw_softc *sc, struct mvsw_port *p) +{ + int port = p->p_port; + uint16_t r; + + /* tell the switch this is the cpu port */ + r = MVSW_G1_MONITOR_MGMT_CTL_UPDATE | + MVSW_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST | + (port | MVSW_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST_MGMTPRI); + mvsw_smi_write(sc, MVSW_G2, MVSW_G1_MONITOR_MGMT_CTL, r); + + r = MVSW_G1_MONITOR_MGMT_CTL_UPDATE | + MVSW_G1_MONITOR_MGMT_CTL_PTR_INGRESS_DEST | + port; + mvsw_smi_write(sc, MVSW_G2, MVSW_G1_MONITOR_MGMT_CTL, r); + + r = MVSW_G1_MONITOR_MGMT_CTL_UPDATE | + MVSW_G1_MONITOR_MGMT_CTL_PTR_EGRESS_DEST | + port; + mvsw_smi_write(sc, MVSW_G2, MVSW_G1_MONITOR_MGMT_CTL, r); +} + +static int +mvsw_p_ioctl(struct ifnet *ifp0, u_long cmd, caddr_t data) +{ + struct arpcom *ac0 = (struct arpcom *)ifp0; + struct mvsw_port *p = ac0->ac_trunkport; + int error = 0; + + switch (cmd) { + case SIOCGTRUNKPORT: { + struct trunk_reqport *rp = (struct trunk_reqport *)data; + struct mvsw_softc *sc = p->p_softc; + + if (strncmp(rp->rp_ifname, rp->rp_portname, + sizeof(rp->rp_ifname)) != 0) + return (EINVAL); + + (void)strlcpy(rp->rp_ifname, DEVNAME(sc), + sizeof(rp->rp_ifname)); + break; + } + + case SIOCSIFLLADDR: + error = EBUSY; + break; + + default: + error = (*p->p_ioctl)(ifp0, cmd, data); + break; + } + + return (error); +} + +static int +mvsw_p_output(struct ifnet *ifp0, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + struct arpcom *ac0 = (struct arpcom *)ifp0; + struct mvsw_port *p = ac0->ac_trunkport; + +#if 0 + /* restrict transmission to bpf only */ + if (m_tag_find(m, PACKET_TAG_DLT, NULL) == NULL) { + m_freem(m); + return (EBUSY); + } +#endif + + return ((*p->p_output)(ifp0, m, dst, rt)); +} + +static void +mvsw_p_input(struct ifnet *ifp0, struct mbuf *m) +{ + struct arpcom *ac0 = (struct arpcom *)ifp0; + struct mvsw_port *p = ac0->ac_trunkport; + struct mvsw_softc *sc = p->p_softc; + struct ether_header *eh; + struct mvsw_etag *etag; + int hlen = sizeof(*eh) + sizeof(*etag); + int diff = hlen - offsetof(struct ether_header, ether_type); + uint16_t tag0; + struct eport_softc *eport; + + eh = mtod(m, struct ether_header *); + if (eh->ether_type != htons(ETHERTYPE_MVSW_ETAG)) + goto drop; + + if (m->m_len < hlen) { + m = m_pullup(m, hlen); + if (m == NULL) { + /* drop++ */ + return; + } + + eh = mtod(m, struct ether_header *); + } + + etag = (struct mvsw_etag *)(eh + 1); + tag0 = bemtoh16(&etag->tag0); + + int port = (tag0 >> MVSW_TAG_PORT_SHIFT) & MVSW_TAG_PORT_MASK; + if (port >= sc->sc_nports) + goto drop; + + eport = sc->sc_ports[port]; + if (eport == NULL) + goto drop; + + memmove(mtod(m, caddr_t) + diff, mtod(m, caddr_t), + offsetof(struct ether_header, ether_type)); + m_adj(m, diff); + + eport_input(eport, m); + return; + +drop: + m_freem(m); +} + +static int +mvsw_print(void *aux, const char *pnp) +{ + int node = *(int *)aux; + int port; + + if (pnp != NULL) + printf("\"port\" at %s", pnp); + + port = OF_getpropint(node, "reg", 0); + printf(" port %d", port); + + return (UNCONF); } static inline int @@ -219,6 +946,53 @@ mvsw_smi_write(struct mvsw_softc *sc, in mvsw_smi_wait(sc); } +static int +mvsw_wait(struct mvsw_softc *sc, int phy, int reg, uint16_t mask, uint16_t v, + const char *wmesg) +{ + unsigned int i; + uint16_t r; + + for (i = 0; i < 16; i++) { + r = mvsw_smi_read(sc, phy, reg); + if ((r & mask) == v) + return (0); + + tsleep_nsec(&sc->sc_mdio, PPAUSE, wmesg, 1500000); + } + + return (-1); +} + +static int +mvsw_vtu_op(struct mvsw_softc *sc, uint16_t op) +{ + mvsw_smi_write(sc, MVSW_G1, MVSW_G1_VTU_OP, + MVSW_G1_VTU_OP_BUSY | op); + + return (mvsw_vtu_wait(sc)); +} + +static int +mvsw_atu_op(struct mvsw_softc *sc, uint16_t fid, uint16_t op, uint16_t data) +{ + mvsw_smi_write(sc, MVSW_G1, MVSW_G1_ATU_DATA, data); + mvsw_smi_write(sc, MVSW_G1, MVSW_G1_ATU_FID, fid); + mvsw_smi_write(sc, MVSW_G1, MVSW_G1_VTU_OP, + MVSW_G1_VTU_OP_BUSY | op); + + return (mvsw_atu_wait(sc)); +} + +static int +mvsw_irl_op(struct mvsw_softc *sc, uint16_t op) +{ + mvsw_smi_write(sc, MVSW_G2, MVSW_G2_IRL_CMD, + MVSW_G2_IRL_CMD_BUSY | op); + + return (mvsw_irl_wait(sc)); +} + int mvsw_phy_wait(struct mvsw_softc *sc) { @@ -308,23 +1082,6 @@ mvsw_serdes_write(struct mvsw_softc *sc, } void -mvsw_port_enable(struct mvsw_softc *sc, int node) -{ - uint16_t val; - int port; - - port = OF_getpropint(node, "reg", -1); - if (port == -1) - return; - - /* Enable port. */ - val = mvsw_smi_read(sc, MVSW_PORT(port), MVSW_PORT_CTRL); - val &= ~MVSW_PORT_CTRL_STATE_MASK; - val |= MVSW_PORT_CTRL_STATE_FORWARD; - mvsw_smi_write(sc, MVSW_PORT(port), MVSW_PORT_CTRL, val); -} - -void mvsw_phy_enable(struct mvsw_softc *sc, int node) { uint16_t val; @@ -357,4 +1114,395 @@ mvsw_serdes_enable(struct mvsw_softc *sc val |= BMCR_AUTOEN; mvsw_serdes_write(sc, MVSW_SERDES(port), MDIO_MMD_PHYXS, MVSW_SERDES_BMCR, val); +} + +struct eport_softc { + struct device sc_dev; + int sc_node; + int sc_port; + + struct arpcom sc_ac; +#define sc_if sc_ac.ac_if + + struct mii_bus *sc_mdio; + struct mii_data sc_mii; +#define sc_ifmedia sc_mii.mii_media + struct mvsw_softc *sc_parent; + + struct if_device sc_ifd; +}; + +static int eport_match(struct device *, void *, void *); +static void eport_attach(struct device *, struct device *, void *); + +const struct cfattach eport_ca = { + sizeof (struct eport_softc), eport_match, eport_attach +}; + +struct cfdriver eport_cd = { + NULL, "eport", DV_IFNET +}; + +static int eport_enqueue(struct ifnet *, struct mbuf *); +static void eport_start(struct ifqueue *); +static int eport_ioctl(struct ifnet *, u_long, caddr_t); + +static int eport_up(struct eport_softc *); +static int eport_down(struct eport_softc *); + +static int eport_miibus_readreg(struct device *, int, int); +static void eport_miibus_writereg(struct device *, int, int, int); +static void eport_miibus_statch(struct device *); + +static int eport_media_upd(struct ifnet *); +static void eport_media_sts(struct ifnet *, struct ifmediareq *); + +static uint16_t eport_smi_read(struct eport_softc *, int); +static void eport_smi_write(struct eport_softc *, int, uint16_t); + +static int +eport_match(struct device *parent, void *match, void *aux) +{ + return (1); +} + +static void +eport_attach(struct device *parent, struct device *self, void *aux) +{ + struct eport_softc *sc = (struct eport_softc *)self; + int node = *(int *)aux; + struct ifnet *ifp; + int phyph, phynode; + int port; + uint16_t r; + + sc->sc_node = node; + sc->sc_port = port = OF_getpropint(node, "reg", -1); + + ifp = &sc->sc_if; + (void)strlcpy(ifp->if_xname, DEVNAME(sc), sizeof(ifp->if_xname)); + ifp->if_softc = sc; + ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; + ifp->if_ioctl = eport_ioctl; + ifp->if_enqueue = eport_enqueue; + ifp->if_qstart = eport_start; + ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST; + ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; + + OF_getprop(node, "label", + ifp->if_description, sizeof(ifp->if_description)); + + if (OF_getprop(node, "local-mac-address", &sc->sc_ac.ac_enaddr, + sizeof(sc->sc_ac.ac_enaddr)) != sizeof(sc->sc_ac.ac_enaddr)) + ether_fakeaddr(ifp); + + printf(": address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr)); + + phyph = OF_getpropint(node, "phy-handle", 0); + phynode = OF_getnodebyphandle(phyph); + if (phynode != 0) { +#ifdef notyet + int phyloc = OF_getpropint(phynode, "reg", 0); +#endif + struct mii_data *mii = &sc->sc_mii; + + mii->mii_ifp = ifp; + mii->mii_readreg = eport_miibus_readreg; + mii->mii_writereg = eport_miibus_writereg; + mii->mii_statchg = eport_miibus_statch; + + ifmedia_init(&sc->sc_ifmedia, 0, + eport_media_upd, eport_media_sts); + +#ifdef notyet + mii_attach(self, mii, 0xffffffff, phyloc, MII_OFFSET_ANY, 0); +#else + LIST_INIT(&mii->mii_phys); +#endif + if (LIST_FIRST(&mii->mii_phys) == NULL) { +#ifdef notyet + printf("%s: no PHY found!\n", DEVNAME(sc)); +#endif + ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_MANUAL, + 0, NULL); + ifmedia_set(&sc->sc_ifmedia, IFM_ETHER|IFM_MANUAL); + } else + ifmedia_set(&sc->sc_ifmedia, IFM_ETHER|IFM_AUTO); + } + + if_counters_alloc(ifp); + if_attach(ifp); + ether_ifattach(ifp); + + sc->sc_ifd.if_node = sc->sc_node; + sc->sc_ifd.if_ifp = ifp; + if_register(&sc->sc_ifd); + + /* Configure port. */ + r = eport_smi_read(sc, MVSW_PORT_CTRL0); + CLR(r, MVSW_PORT_CTRL0_STATE_MASK); + SET(r, MVSW_PORT_CTRL0_STATE_DISABLED); + CLR(r, MVSW_PORT_CTRL0_FRAME_MODE_MASK); + SET(r, MVSW_PORT_CTRL0_FRAME_MODE_NORMAL); + CLR(r, MVSW_PORT_CTRL0_EGRESS_MODE_MASK); + SET(r, MVSW_PORT_CTRL0_EGRESS_MODE_UNMODIFIED); + SET(r, MVSW_PORT_CTRL0_EGRESS_FLOOD_UCAST | + MVSW_PORT_CTRL0_EGRESS_FLOOD_MCAST); + eport_smi_write(sc, MVSW_PORT_CTRL0, r); + + r = eport_smi_read(sc, MVSW_PORT_CTRL2); + CLR(r, MVSW_PORT_CTRL2_MAP_DA); + CLR(r, MVSW_PORT_CTRL2_JUMBO_MODE_MASK); + SET(r, MVSW_PORT_CTRL2_JUMBO_MODE_10240); + CLR(r, MVSW_PORT_CTRL2_8021Q_MODE_MASK); + SET(r, MVSW_PORT_CTRL2_8021Q_MODE_DISABLED); + CLR(r, MVSW_PORT_CTRL2_DISCARD_TAGGED); + CLR(r, MVSW_PORT_CTRL2_DISCARD_UNTAGGED); + eport_smi_write(sc, MVSW_PORT_CTRL2, r); + + eport_smi_write(sc, MVSW_PORT_ASSOC_VECTOR, 0); + + eport_smi_write(sc, MVSW_PORT_ETH_TYPE, ETHERTYPE_MVSW_DEFAULT); +} + +static uint16_t +eport_smi_read(struct eport_softc *sc, int reg) +{ + return mvsw_smi_read((struct mvsw_softc *)sc->sc_dev.dv_parent, + MVSW_PORT(sc->sc_port), reg); +} + +static void +eport_smi_write(struct eport_softc *sc, int reg, uint16_t r) +{ + mvsw_smi_write((struct mvsw_softc *)sc->sc_dev.dv_parent, + MVSW_PORT(sc->sc_port), reg, r); +} + +static int +eport_miibus_readreg(struct device *dev, int phy, int reg) +{ + struct device *parent = dev->dv_parent; + struct mvsw_softc *sc = (struct mvsw_softc *)parent; + int v; + + v = mvsw_phy_read(sc, phy, reg); + + /* internal phy doesnt report a useful model number */ + if (reg == MII_PHYIDR2 && MII_MODEL(v) == 0) + v |= 0x0002 << 4; + + return (v); +} + +static void +eport_miibus_writereg(struct device *dev, int phy, int reg, int val) +{ + struct device *parent = dev->dv_parent; + struct mvsw_softc *sc = (struct mvsw_softc *)parent; + + return (mvsw_phy_write(sc, phy, reg, val)); +} + +static void +eport_miibus_statch(struct device *dev) +{ + printf("%s: %s[%u]\n", dev->dv_xname, __func__, __LINE__); +} + +static int +eport_media_upd(struct ifnet *ifp) +{ + struct eport_softc *sc = ifp->if_softc; + + if (LIST_FIRST(&sc->sc_mii.mii_phys)) + mii_mediachg(&sc->sc_mii); + + return (0); +} + +static void +eport_media_sts(struct ifnet *ifp, struct ifmediareq *ifmr) +{ + struct eport_softc *sc = ifp->if_softc; + + if (LIST_FIRST(&sc->sc_mii.mii_phys)) { + mii_pollstat(&sc->sc_mii); + ifmr->ifm_active = sc->sc_mii.mii_media_active; + ifmr->ifm_status = sc->sc_mii.mii_media_status; + } +} + +static int +eport_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct eport_softc *sc = ifp->if_softc; +#ifdef notyet + struct ifreq *ifr = (struct ifreq *)data; +#endif + int error = 0; + + switch (cmd) { + case SIOCSIFFLAGS: + if (ISSET(ifp->if_flags, IFF_UP)) { + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + error = eport_up(sc); + } else { + if (ISSET(ifp->if_flags, IFF_RUNNING)) + error = eport_down(sc); + } + break; + +#ifdef notyet + case SIOCSIFMEDIA: + case SIOCGIFMEDIA: + error = ifmedia_ioctl(ifp, ifr, &sc->sc_ifmedia, cmd); + break; +#endif + + default: + error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); + break; + } + + if (error == ENETRESET) { + /* hardware doesnt need reprogramming */ + error = 0; + } + + return (error); +} + +static int +eport_up(struct eport_softc *sc) +{ + struct ifnet *ifp = &sc->sc_if; + uint16_t r; + + r = eport_smi_read(sc, MVSW_PORT_CTRL0); + CLR(r, MVSW_PORT_CTRL0_STATE_MASK); + SET(r, MVSW_PORT_CTRL0_STATE_FORWARD); + eport_smi_write(sc, MVSW_PORT_CTRL0, r); + + SET(ifp->if_flags, IFF_RUNNING); + + return (0); +} + +static int +eport_down(struct eport_softc *sc) +{ + struct ifnet *ifp = &sc->sc_if; + uint16_t r; + + CLR(ifp->if_flags, IFF_RUNNING); + + r = eport_smi_read(sc, MVSW_PORT_CTRL0); + CLR(r, MVSW_PORT_CTRL0_STATE_MASK); + SET(r, MVSW_PORT_CTRL0_STATE_DISABLED); + eport_smi_write(sc, MVSW_PORT_CTRL0, r); + + return (0); +} + +static unsigned int +eport_transmit(struct eport_softc *sc, struct mvsw_softc *ssc, + struct mvsw_port *p, struct mbuf *m) +{ + struct ether_header *eh; + struct mvsw_etag *etag; + const int hlen = sizeof(*eh) + sizeof(*etag); + const int offs = offsetof(struct ether_header, ether_type); + const int diff = hlen - offs; + +#if NBPFILTER > 0 + caddr_t if_bpf = sc->sc_if.if_bpf; + if (if_bpf) + bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); +#endif + + m = m_prepend(m, diff, M_NOWAIT); + if (m == NULL) + return (ENOBUFS); + + if (m->m_len < hlen) { + m = m_pullup(m, hlen); + if (m == NULL) + return (ENOBUFS); + } + + memmove(mtod(m, caddr_t), mtod(m, caddr_t) + diff, offs); + eh = mtod(m, struct ether_header *); + eh->ether_type = htons(ETHERTYPE_MVSW_ETAG); + + etag = (struct mvsw_etag *)(eh + 1); + etag->reserved = htons(0); + etag->tag0 = htons(MVSW_TAG_MODE_FROM_CPU | + (ssc->sc_dev.dv_unit << MVSW_TAG_SWITCH_SHIFT) | + (sc->sc_port << MVSW_TAG_PORT_SHIFT)); + etag->tag1 = htons(0); + + return (if_enqueue(p->p_ifp0, m)); +} + +static int +eport_enqueue(struct ifnet *ifp, struct mbuf *m) +{ + struct eport_softc *sc = ifp->if_softc; + struct ifqueue *ifq = &ifp->if_snd; + struct mvsw_softc *ssc = (struct mvsw_softc *)sc->sc_dev.dv_parent; + struct mvsw_port *p = TAILQ_FIRST(&ssc->sc_cpus); + int error; + + if (p == NULL || !ISSET(p->p_ifp0->if_flags, IFF_RUNNING)) { + m_freem(m); + return (ENETDOWN); + } + + if (!ifq_is_priq(ifq)) + return (if_enqueue_ifq(ifp, m)); + + counters_pkt(ifp->if_counters, + ifc_opackets, ifc_obytes, m->m_pkthdr.len); + + error = eport_transmit(sc, ssc, p, m); + if (error != 0) { + counters_inc(ifp->if_counters, ifc_oerrors); + return (error); + } + + return (0); +} + +static void +eport_start(struct ifqueue *ifq) +{ + struct ifnet *ifp = ifq->ifq_if; + struct eport_softc *sc = ifp->if_softc; + struct mbuf *m; + unsigned int errors = 0; + + struct mvsw_softc *ssc = (struct mvsw_softc *)sc->sc_dev.dv_parent; + struct mvsw_port *p = TAILQ_FIRST(&ssc->sc_cpus); + + if (p == NULL || !ISSET(p->p_ifp0->if_flags, IFF_RUNNING)) { + ifq_purge(ifq); + return; + } + + while ((m = ifq_dequeue(ifq)) != NULL) { + if (eport_transmit(sc, ssc, p, m) != 0) + errors++; + } + + if (errors) + counters_add(ifp->if_counters, ifc_oerrors, errors); +} + +static void +eport_input(struct eport_softc *sc, struct mbuf *m) +{ + struct ifnet *ifp = &sc->sc_if; + + if_vinput(ifp, m); } Index: sys/dev/ofw/ofw_misc.c =================================================================== RCS file: /cvs/src/sys/dev/ofw/ofw_misc.c,v retrieving revision 1.36 diff -u -p -r1.36 ofw_misc.c --- sys/dev/ofw/ofw_misc.c 25 Mar 2022 15:49:29 -0000 1.36 +++ sys/dev/ofw/ofw_misc.c 6 Jun 2022 06:05:22 -0000 @@ -119,6 +119,46 @@ regmap_read_4(struct regmap *rm, bus_siz return bus_space_read_4(rm->rm_tag, rm->rm_handle, offset); } +/* + * Network interface support. + */ + +LIST_HEAD(, if_device) if_devices = + LIST_HEAD_INITIALIZER(if_devices); + +void +if_register(struct if_device *ifd) +{ + ifd->if_phandle = OF_getpropint(ifd->if_node, "phandle", 0); + + LIST_INSERT_HEAD(&if_devices, ifd, if_list); +} + +struct ifnet * +if_bynode(int node) +{ + struct if_device *ifd; + + LIST_FOREACH(ifd, &if_devices, if_list) { + if (ifd->if_node == node) + return (ifd->if_ifp); + } + + return (NULL); +} + +struct ifnet * +if_byphandle(uint32_t phandle) +{ + struct if_device *ifd; + + LIST_FOREACH(ifd, &if_devices, if_list) { + if (ifd->if_phandle == phandle) + return (ifd->if_ifp); + } + + return (NULL); +} /* * PHY support. Index: sys/dev/ofw/ofw_misc.h =================================================================== RCS file: /cvs/src/sys/dev/ofw/ofw_misc.h,v retrieving revision 1.24 diff -u -p -r1.24 ofw_misc.h --- sys/dev/ofw/ofw_misc.h 21 Mar 2022 19:22:40 -0000 1.24 +++ sys/dev/ofw/ofw_misc.h 6 Jun 2022 06:05:22 -0000 @@ -30,6 +30,23 @@ struct regmap *regmap_byphandle(uint32_t uint32_t regmap_read_4(struct regmap *, bus_size_t); void regmap_write_4(struct regmap *, bus_size_t, uint32_t); +/* Interface support */ + +struct ifnet; + +struct if_device { + int if_node; + struct ifnet *if_ifp; + + LIST_ENTRY(if_device) if_list; + uint32_t if_phandle; +}; + +void if_register(struct if_device *); + +struct ifnet *if_bynode(int); +struct ifnet *if_byphandle(uint32_t); + /* PHY support */ #define PHY_NONE 0 Index: sys/dev/pci/if_vmx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_vmx.c,v retrieving revision 1.69 diff -u -p -r1.69 if_vmx.c --- sys/dev/pci/if_vmx.c 11 Mar 2022 18:00:50 -0000 1.69 +++ sys/dev/pci/if_vmx.c 6 Jun 2022 06:05:22 -0000 @@ -833,8 +833,8 @@ vmxnet3_intr(void *arg) } if (ifp->if_flags & IFF_RUNNING) { - vmxnet3_rxintr(sc, &sc->sc_q[0].rx); vmxnet3_txintr(sc, &sc->sc_q[0].tx); + vmxnet3_rxintr(sc, &sc->sc_q[0].rx); vmxnet3_enable_intr(sc, 0); } @@ -861,8 +861,8 @@ vmxnet3_intr_queue(void *arg) { struct vmxnet3_queue *q = arg; - vmxnet3_rxintr(q->sc, &q->rx); vmxnet3_txintr(q->sc, &q->tx); + vmxnet3_rxintr(q->sc, &q->rx); vmxnet3_enable_intr(q->sc, q->intr); return 1; Index: sys/kern/kern_clock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_clock.c,v retrieving revision 1.103 diff -u -p -r1.103 kern_clock.c --- sys/kern/kern_clock.c 16 Feb 2022 08:01:32 -0000 1.103 +++ sys/kern/kern_clock.c 6 Jun 2022 06:05:24 -0000 @@ -49,6 +49,7 @@ #include #include #include +#include #if defined(GPROF) || defined(DDBPROF) @@ -140,6 +141,11 @@ hardclock(struct clockframe *frame) struct proc *p; struct cpu_info *ci = curcpu(); + LLTRACE_CPU(ci, lltrace_statclock, + CLKF_USERMODE(frame), CLKF_PC(frame)); + + LLTRACE_CPU(ci, lltrace_irq, LLTRACE_IRQ_LOCAL_TIMER, 0); + p = curproc; if (p && ((p->p_flag & (P_SYSTEM | P_WEXIT)) == 0)) { struct process *pr = p->p_p; @@ -179,17 +185,18 @@ hardclock(struct clockframe *frame) * If we are not the primary CPU, we're not allowed to do * any more work. */ - if (CPU_IS_PRIMARY(ci) == 0) - return; + if (CPU_IS_PRIMARY(ci)) { + tc_ticktock(); + ticks++; + jiffies++; - tc_ticktock(); - ticks++; - jiffies++; + /* + * Update the timeout wheel. + */ + timeout_hardclock_update(); + } - /* - * Update the timeout wheel. - */ - timeout_hardclock_update(); + LLTRACE_CPU(ci, lltrace_irqret, LLTRACE_IRQ_LOCAL_TIMER, 0); } /* Index: sys/kern/kern_exec.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exec.c,v retrieving revision 1.230 diff -u -p -r1.230 kern_exec.c --- sys/kern/kern_exec.c 22 Feb 2022 17:14:14 -0000 1.230 +++ sys/kern/kern_exec.c 6 Jun 2022 06:05:24 -0000 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -510,6 +511,8 @@ sys_execve(struct proc *p, void *v, regi memset(pr->ps_comm, 0, sizeof(pr->ps_comm)); strlcpy(pr->ps_comm, nid.ni_cnd.cn_nameptr, sizeof(pr->ps_comm)); pr->ps_acflag &= ~AFORK; + + LLTRACE(lltrace_pidname, p); /* record proc's vnode, for use by sysctl */ otvp = pr->ps_textvp; Index: sys/kern/kern_lock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_lock.c,v retrieving revision 1.72 diff -u -p -r1.72 kern_lock.c --- sys/kern/kern_lock.c 26 Apr 2022 15:31:14 -0000 1.72 +++ sys/kern/kern_lock.c 6 Jun 2022 06:05:24 -0000 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -139,6 +140,8 @@ __mp_lock(struct __mp_lock *mpl) LOP_EXCLUSIVE | LOP_NEWORDER, NULL); #endif + LLTRACE(lltrace_klock, mpl, LLTRACE_LOCK_NOACQUIRE); + s = intr_disable(); if (cpu->mplc_depth++ == 0) cpu->mplc_ticket = atomic_inc_int_nv(&mpl->mpl_users); @@ -148,6 +151,8 @@ __mp_lock(struct __mp_lock *mpl) membar_enter_after_atomic(); WITNESS_LOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); + + LLTRACE(lltrace_klock, mpl, LLTRACE_LOCK_ACQUIRE); } void @@ -167,6 +172,7 @@ __mp_unlock(struct __mp_lock *mpl) s = intr_disable(); if (--cpu->mplc_depth == 0) { + LLTRACE(lltrace_klock, mpl, LLTRACE_LOCK_WAKEUP); membar_exit(); mpl->mpl_ticket++; } @@ -183,6 +189,8 @@ __mp_release_all(struct __mp_lock *mpl) int i; #endif + LLTRACE(lltrace_klock, mpl, LLTRACE_LOCK_WAKEUP); + s = intr_disable(); rv = cpu->mplc_depth; #ifdef WITNESS @@ -443,3 +451,45 @@ _mtx_init_flags(struct mutex *m, int ipl _mtx_init(m, ipl); } #endif /* WITNESS */ + +void +NET_LOCK(void) +{ + LLTRACE(lltrace_lock, &netlock, LLTRACE_LOCK_NOACQUIRE); + rw_enter_write(&netlock); + LLTRACE(lltrace_lock, &netlock, LLTRACE_LOCK_ACQUIRE); +} + +void +NET_UNLOCK(void) +{ + rw_exit_write(&netlock); + LLTRACE(lltrace_lock, &netlock, LLTRACE_LOCK_WAKEUP); +} + +void +NET_RLOCK_IN_SOFTNET(void) +{ + LLTRACE(lltrace_lock, &netlock, LLTRACE_LOCK_NOACQUIRE); + rw_enter_write(&netlock); + LLTRACE(lltrace_lock, &netlock, LLTRACE_LOCK_ACQUIRE); +} + +void +NET_RUNLOCK_IN_SOFTNET(void) +{ + rw_exit_write(&netlock); + LLTRACE(lltrace_lock, &netlock, LLTRACE_LOCK_WAKEUP); +} + +void +NET_RLOCK_IN_IOCTL(void) +{ + rw_enter_read(&netlock); +} + +void +NET_RUNLOCK_IN_IOCTL(void) +{ + rw_exit_read(&netlock); +} Index: sys/kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.74 diff -u -p -r1.74 kern_sched.c --- sys/kern/kern_sched.c 20 Jan 2022 11:06:57 -0000 1.74 +++ sys/kern/kern_sched.c 6 Jun 2022 06:05:24 -0000 @@ -187,6 +187,8 @@ sched_idle(void *v) wakeup(spc); } #endif + + LLTRACE(lltrace_idle); cpu_idle_cycle(); } cpu_idle_leave(); Index: sys/kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.187 diff -u -p -r1.187 kern_synch.c --- sys/kern/kern_synch.c 13 May 2022 15:32:00 -0000 1.187 +++ sys/kern/kern_synch.c 6 Jun 2022 06:05:24 -0000 @@ -37,6 +37,8 @@ * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 */ +#include "llt.h" + #include #include #include @@ -302,7 +304,7 @@ rwsleep(const volatile void *ident, stru struct sleep_state sls; int error, status; - KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); + KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK | PLLTRACE)) == 0); KASSERT(ident != &nowake || ISSET(priority, PCATCH) || timo != 0); rw_assert_anylock(rwl); status = rw_status(rwl); @@ -310,11 +312,26 @@ rwsleep(const volatile void *ident, stru sleep_setup(&sls, ident, priority, wmesg, timo); rw_exit(rwl); + +#if NLLT > 0 + if (priority & PLLTRACE) + LLTRACE(lltrace_lock, rwl, LLTRACE_LOCK_WAKEUP); +#endif + /* signal may stop the process, release rwlock before that */ error = sleep_finish(&sls, 1); - if ((priority & PNORELOCK) == 0) + if ((priority & PNORELOCK) == 0) { +#if NLLT > 0 + if (priority & PLLTRACE) + LLTRACE(lltrace_lock, rwl, LLTRACE_LOCK_NOACQUIRE); +#endif rw_enter(rwl, status); +#if NLLT > 0 + if (priority & PLLTRACE) + LLTRACE(lltrace_lock, rwl, LLTRACE_LOCK_ACQUIRE); +#endif + } return error; } @@ -524,6 +541,7 @@ unsleep(struct proc *p) p->p_wchan = NULL; TRACEPOINT(sched, wakeup, p->p_tid + THREAD_PID_OFFSET, p->p_p->ps_pid); + LLTRACE(lltrace_runnable, p); } } Index: sys/kern/kern_tc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_tc.c,v retrieving revision 1.75 diff -u -p -r1.75 kern_tc.c --- sys/kern/kern_tc.c 24 Oct 2021 00:02:25 -0000 1.75 +++ sys/kern/kern_tc.c 6 Jun 2022 06:05:24 -0000 @@ -140,6 +140,14 @@ tc_delta(struct timehands *th) tc->tc_counter_mask); } +unsigned int +countertime(void) +{ + struct timecounter *tc = timehands->th_counter; + + return (tc->tc_get_timecount(tc)); +} + /* * Functions for reading the time. We have to loop until we are sure that * the timehands that we operated on was not updated under our feet. See Index: sys/kern/kern_timeout.c =================================================================== RCS file: /cvs/src/sys/kern/kern_timeout.c,v retrieving revision 1.85 diff -u -p -r1.85 kern_timeout.c --- sys/kern/kern_timeout.c 19 Jun 2021 02:05:33 -0000 1.85 +++ sys/kern/kern_timeout.c 6 Jun 2022 06:05:24 -0000 @@ -35,6 +35,7 @@ #include /* _Q_INVALIDATE */ #include #include +#include #ifdef DDB #include @@ -738,6 +739,8 @@ softclock(void *arg) struct timeout *first_new, *to; int needsproc, new; + LLTRACE(lltrace_irq, LLTRACE_IRQ_BOTTOM_HALF, 0); + first_new = NULL; new = 0; @@ -761,6 +764,8 @@ softclock(void *arg) if (needsproc) wakeup(&timeout_proc); + + LLTRACE(lltrace_irqret, LLTRACE_IRQ_BOTTOM_HALF, 0); } void Index: sys/kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.71 diff -u -p -r1.71 sched_bsd.c --- sys/kern/sched_bsd.c 10 May 2022 22:18:06 -0000 1.71 +++ sys/kern/sched_bsd.c 6 Jun 2022 06:05:24 -0000 @@ -54,7 +54,6 @@ #include #endif - int lbolt; /* once a second sleep address */ int rrticks_init; /* # of hardclock ticks per roundrobin() */ @@ -323,6 +322,8 @@ mi_switch(void) int sched_count; #endif + LLTRACE(lltrace_sched_enter); + assertwaitok(); KASSERT(p->p_stat != SONPROC); @@ -370,10 +371,13 @@ mi_switch(void) if (p != nextproc) { uvmexp.swtch++; + TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET, nextproc->p_p->ps_pid); cpu_switchto(p, nextproc); TRACEPOINT(sched, on__cpu, NULL); + + LLTRACE(lltrace_pidname, p); } else { TRACEPOINT(sched, remain__cpu, NULL); p->p_stat = SONPROC; @@ -394,6 +398,8 @@ mi_switch(void) #endif SCHED_ASSERT_UNLOCKED(); + + LLTRACE(lltrace_sched_leave); smr_idle(); Index: sys/kern/subr_pool.c =================================================================== RCS file: /cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.235 diff -u -p -r1.235 subr_pool.c --- sys/kern/subr_pool.c 20 Jan 2022 11:06:57 -0000 1.235 +++ sys/kern/subr_pool.c 6 Jun 2022 06:05:24 -0000 @@ -2040,9 +2040,8 @@ pool_cache_gc(struct pool *pp) contention = pp->pr_cache_contention; delta = contention - pp->pr_cache_contention_prev; - if (delta > 8 /* magic */) { - if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) - pp->pr_cache_items += 8; + if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) { + pp->pr_cache_items += (delta > 8 /* magic */) ? 8 : 1; } else if (delta == 0) { if (pp->pr_cache_items > 8) pp->pr_cache_items--; Index: sys/kern/uipc_socket2.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket2.c,v retrieving revision 1.122 diff -u -p -r1.122 uipc_socket2.c --- sys/kern/uipc_socket2.c 9 May 2022 14:49:55 -0000 1.122 +++ sys/kern/uipc_socket2.c 6 Jun 2022 06:05:24 -0000 @@ -343,7 +343,7 @@ sosleep_nsec(struct socket *so, void *id switch (so->so_proto->pr_domain->dom_family) { case PF_INET: case PF_INET6: - ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs); + ret = rwsleep_nsec(ident, &netlock, PLLTRACE|prio, wmesg, nsecs); break; case PF_UNIX: ret = rwsleep_nsec(ident, &unp_lock, prio, wmesg, nsecs); Index: sys/net/ethertypes.h =================================================================== RCS file: /cvs/src/sys/net/ethertypes.h,v retrieving revision 1.16 diff -u -p -r1.16 ethertypes.h --- sys/net/ethertypes.h 5 Jan 2022 05:19:22 -0000 1.16 +++ sys/net/ethertypes.h 6 Jun 2022 06:05:24 -0000 @@ -303,6 +303,8 @@ #define ETHERTYPE_AOE 0x88A2 /* ATA over Ethernet */ #define ETHERTYPE_QINQ 0x88A8 /* 802.1ad VLAN stacking */ #define ETHERTYPE_LLDP 0x88CC /* Link Layer Discovery Protocol */ +#define ETHERTYPE_802_EX1 0x88B5 /* IEEE Std 802 - Local Experimental */ +#define ETHERTYPE_802_EX2 0x88B6 /* IEEE Std 802 - Local Experimental */ #define ETHERTYPE_MACSEC 0x88e5 /* 802.1AE MACsec */ #define ETHERTYPE_PBB 0x88e7 /* 802.1Q Provider Backbone Bridging */ #define ETHERTYPE_NSH 0x984F /* Network Service Header (RFC8300) */ Index: sys/net/ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v retrieving revision 1.46 diff -u -p -r1.46 ifq.c --- sys/net/ifq.c 30 Apr 2022 21:13:57 -0000 1.46 +++ sys/net/ifq.c 6 Jun 2022 06:05:24 -0000 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -144,6 +145,7 @@ ifq_start_task(void *p) ifq_empty(ifq) || ifq_is_oactive(ifq)) return; + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_IFQ, ifq_len(ifq)); ifp->if_qstart(ifq); } @@ -154,6 +156,7 @@ ifq_restart_task(void *p) struct ifnet *ifp = ifq->ifq_if; ifq_clr_oactive(ifq); + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_IFQ, ifq_len(ifq)); ifp->if_qstart(ifq); } @@ -688,6 +691,8 @@ ifiq_input(struct ifiqueue *ifiq, struct } packets = ml_len(ml); + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_IFIQ, packets); + #if NBPFILTER > 0 if_bpf = ifp->if_bpf; if (if_bpf) { @@ -728,8 +733,10 @@ ifiq_input(struct ifiqueue *ifiq, struct if (ml_empty(ml)) task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); - else + else { + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_QDROP, ml_len(ml)); ml_purge(ml); + } return (len > ifiq_maxlen_return); } @@ -769,6 +776,8 @@ ifiq_process(void *arg) ml = ifiq->ifiq_ml; ml_init(&ifiq->ifiq_ml); mtx_leave(&ifiq->ifiq_mtx); + + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_NETTQ, ml_len(&ml)); if_input_process(ifiq->ifiq_if, &ml); } Index: sys/sys/conf.h =================================================================== RCS file: /cvs/src/sys/sys/conf.h,v retrieving revision 1.156 diff -u -p -r1.156 conf.h --- sys/sys/conf.h 23 Jan 2021 05:08:36 -0000 1.156 +++ sys/sys/conf.h 6 Jun 2022 06:05:24 -0000 @@ -335,6 +335,13 @@ extern struct cdevsw cdevsw[]; (dev_type_stop((*))) enodev, 0, selfalse, \ (dev_type_mmap((*))) enodev } +/* open, close, read, ioctl, poll, kqfilter */ +#define cdev_lltrace_init(c,n) { \ + dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \ + (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \ + (dev_type_stop((*))) enodev, 0, selfalse, \ + (dev_type_mmap((*))) enodev, 0, 0, dev_init(c,n,kqfilter) } + /* open, close, read, write, ioctl, stop, tty, poll, mmap, kqfilter */ #define cdev_wsdisplay_init(c,n) { \ dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \ @@ -620,6 +627,7 @@ cdev_decl(wsmux); cdev_decl(ksyms); cdev_decl(kstat); +cdev_decl(lltrace); cdev_decl(bio); cdev_decl(vscsi); Index: sys/sys/lltrace.h =================================================================== RCS file: sys/sys/lltrace.h diff -N sys/sys/lltrace.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sys/lltrace.h 6 Jun 2022 06:05:24 -0000 @@ -0,0 +1,316 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_LLTRACE_H_ +#define _SYS_LLTRACE_H_ + +/* + * lltrace is heavily based KUTrace (kernel/userland tracing) by + * Richard L. Sites. + */ + +#define LLTRACE_NSLOTS 8192 + +struct lltrace_buffer { + uint64_t llt_slots[LLTRACE_NSLOTS]; +}; + +#define LLTIOCSTART _IO('t',128) +#define LLTIOCSTOP _IO('t',129) +#define LLTIOCFLUSH _IO('t',130) + +/* + * trace until all the buffers are used, or trace and reuse buffers. + */ +#define LLTRACE_MODE_HEAD 0 +#define LLTRACE_MODE_TAIL 1 +#define LLTRACE_MODE_COUNT 2 + +#define LLTIOCSMODE _IOW('t', 131, unsigned int) +#define LLTIOCGMODE _IOR('t', 131, unsigned int) + +/* + * how much memory in MB to allocate for lltrace_buffer structs + * during tracing. + */ + +#define LLTRACE_BLEN_MIN 1 +#define LLTRACE_BLEN_MAX 128 + +#define LLTIOCSBLEN _IOW('t', 132, unsigned int) +#define LLTIOCGBLEN _IOR('t', 132, unsigned int) + +/* + * lltrace collects kernel events in per-CPU buffers. + */ + +/* + * The first 8 words of the per-CPU buffer are dedicated to metadata + * about the CPU and the period of time over which events were + * collected. + */ + +struct lltrace_header { + /* slots[0] */ + uint64_t h_cpu; + + /* slots[1] */ + uint64_t h_boottime; + + /* slots[2] */ + uint64_t h_start_cy; + /* slots[3] */ + uint64_t h_start_ns; + /* slots[4] */ + uint64_t h_end_cy; + /* slots[5] */ + uint64_t h_end_ns; + + /* slots[6] */ + uint32_t h_idletid; + uint32_t h_tid; + /* slots[7] */ + uint64_t h_zero; +}; + +/* + * The high 32-bits of the trace entry contain a timestamp and event id. + */ + +#define LLTRACE_TIMESTAMP_SHIFT 44 +#define LLTRACE_TIMESTAMP_BITS 20 +#define LLTRACE_TIMESTAMP_MASK ((1ULL << LLTRACE_TIMESTAMP_BITS) - 1) + +#define LLTRACE_EVENT_SHIFT 32 +#define LLTRACE_EVENT_BITS 12 +#define LLTRACE_EVENT_MASK ((1ULL << LLTRACE_EVENT_BITS) - 1) + +/* + * The low 32-bits vary depending on the event id. + */ + +/* full 32 bits are used */ +#define LLTRACE_ARG32_SHIFT 0 +#define LLTRACE_ARG32_BITS 32 +#define LLTRACE_ARG32_MASK ((1ULL << LLTRACE_ARG32_BITS) - 1) + +/* layout for syscalls/traps/irqs */ +#define LLTRACE_ARG0_SHIFT 0 +#define LLTRACE_ARG0_BITS 16 +#define LLTRACE_ARG0_MASK ((1ULL << LLTRACE_ARG0_BITS) - 1) + +#define LLTRACE_RETVAL_SHIFT 16 +#define LLTRACE_RETVAL_BITS 8 +#define LLTRACE_RETVAL_MASK ((1ULL << LLTRACE_RETVAL_BITS) - 1) + +#define LLTRACE_DUR_SHIFT 24 +#define LLTRACE_DUR_BITS 8 +#define LLTRACE_DUR_MASK ((1ULL << LLTRACE_DUR_BITS) - 1) + +/* + * lltrace event types + */ + +/* + * the high 3 bits of the event id defines how the rest of the bits are used. + */ + +#define LLTRACE_EVENT_T_MASK (0x7ULL << 9) +#define LLTRACE_EVENT_T_VARLEN (0x0ULL << 9) +#define LLTRACE_EVENT_T_MARK (0x1ULL << 9) +#define LLTRACE_EVENT_T_IRQ (0x2ULL << 9) +#define LLTRACE_EVENT_T_SYSCALL (0x4ULL << 9) +#define LLTRACE_EVENT_T_SYSRET (0x5ULL << 9) + +/* + * variable len events use extra slots on the ring. + */ + +#define LLTRACE_EVENT_VARLEN_MASK (0x00fULL) /* low 4bits are the len */ + +#define LLTRACE_EVENT_PID (LLTRACE_EVENT_T_VARLEN | 0x10) +#define LLTRACE_EVENT_PID_ARG_KTHREAD (1U << 31) +#define LLTRACE_EVENT_LOCKNAME (LLTRACE_EVENT_T_VARLEN | 0x70) + +/* hardcode the space used by PC entries */ +#define LLTRACE_EVENT_PC_K (LLTRACE_EVENT_T_VARLEN | 0x80) +#define LLTRACE_EVENT_PC_U (LLTRACE_EVENT_T_VARLEN | 0x90) + +/* + * mark a particular event occuring + */ + +#define LLTRACE_EVENT_IDLE (LLTRACE_EVENT_T_MARK | 0x0) + +#define LLTRACE_EVENT_RUNNABLE (LLTRACE_EVENT_T_MARK | 0x1) + /* arg32 is tid */ + +#define LLTRACE_EVENT_TRAP (LLTRACE_EVENT_T_MARK | 0x2) +#define LLTRACE_EVENT_TRAPRET (LLTRACE_EVENT_T_MARK | 0x3) + /* arg32 is trap id */ +#define LLTRACE_TRAP_PAGEFAULT 14 /* as per kutrace */ + +#define LLTRACE_EVENT_SCHED (LLTRACE_EVENT_T_MARK | 0x4) +#define LLTRACE_EVENT_SCHEDRET (LLTRACE_EVENT_T_MARK | 0x5) + +#define LLTRACE_EVENT_IPI (LLTRACE_EVENT_T_MARK | 0x6) + /* arg32 is cpu */ + +#define LLTRACE_EVENT_PKTS (LLTRACE_EVENT_T_MARK | 0x7) +#define LLTRACE_PKTS_T_SHIFT 28 +#define LLTRACE_PKTS_T_MASK (0xf << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_IFIQ (0x0 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_NETTQ (0x1 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_IFQ (0x2 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_QDROP (0x3 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_HDROP (0x4 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_V_SHIFT 0 +#define LLTRACE_PKTS_V_MASK (0xffff << LLTRACE_PKTS_V_SHIFT) + +#define LLTRACE_EVENT_LOCK(_t) (LLTRACE_EVENT_T_MARK | 0x10 | (_t)) +#define LLTRACE_LOCK_NOACQUIRE (0x00) +#define LLTRACE_LOCK_ACQUIRE (0x01) +#define LLTRACE_LOCK_WAKEUP (0x02) + +#define LLTRACE_EVENT_KFUNC_ENTER (LLTRACE_EVENT_T_MARK | 0xf0) +#define LLTRACE_EVENT_KFUNC_LEAVE (LLTRACE_EVENT_T_MARK | 0xf1) +#define LLTRACE_EVENT_MARK (LLTRACE_EVENT_T_MARK | 0xff) + +/* + * irqs + */ + +#define LLTRACE_EVENT_IRQ(_c) (LLTRACE_EVENT_T_IRQ | 0x000 | (_c)) +#define LLTRACE_EVENT_IRQRET(_c) (LLTRACE_EVENT_T_IRQ | 0x100 | (_c)) + +#define LLTRACE_IRQ_LOCAL_TIMER (0xecULL) /* like linux */ +#define LLTRACE_IRQ_IPI (0xfdULL) /* like linux */ + +#define LLTRACE_IRQ_BOTTOM_HALF (0xffULL) /* like kutrace */ + +/* + * syscalls and returns from syscalls + */ + +#define LLTRACE_SYSCALL_MASK(_c) ((uint64_t)(_c) & 0x1ff) + +#define LLTRACE_EVENT_SYSCALL(_c) \ + (LLTRACE_EVENT_T_SYSCALL | LLTRACE_SYSCALL_MASK(_c)) +#define LLTRACE_EVENT_SYSRET(_c) \ + (LLTRACE_EVENT_T_SYSRET | LLTRACE_SYSCALL_MASK(_c)) + +/* + * KUTrace event types for compatibility + */ + +#define KUTRACE_FILENAME (0x001ULL) +#define KUTRACE_PIDNAME (0x002ULL) +#define KUTRACE_METHODNAME (0x003ULL) +#define KUTRACE_TRAPNAME (0x004ULL) +#define KUTRACE_LOCKNAME (0x007ULL) + +#define KUTRACE_USERPID (0x200ULL) +#define KUTRACE_RUNNABLE (0x206ULL) +#define KUTRACE_IPI (0x207ULL) +#define KUTRACE_MWAIT (0x208ULL) +#define KUTRACE_PSTATE (0x209ULL) + +#define KUTRACE_MARKA (0x20aULL) +#define KUTRACE_MARKB (0x20bULL) +#define KUTRACE_MARKC (0x20cULL) +#define KUTRACE_MARKD (0x20dULL) + +#define KUTRACE_LOCKNOACQUIRE (0x210ULL) +#define KUTRACE_LOCKACQUIRE (0x211ULL) +#define KUTRACE_LOCKWAKEUP (0x212ULL) + +#define KUTRACE_PC_U (0x280ULL) +#define KUTRACE_PC_K (0x281ULL) + +/* these are in blocks of 256 */ +#define KUTRACE_TRAP (0x400ULL) +#define KUTRACE_IRQ (0x500ULL) +#define KUTRACE_TRAPRET (0x600ULL) +#define KUTRACE_IRQRET (0x700ULL) + +#define KUTRACE_LOCAL_TIMER_VECTOR (0xec) + +/* these are in blocks of 512 */ +#define KUTRACE_SYSCALL_MASK(_c) ((uint64_t)(_c) & 0x1ff) +#define KUTRACE_SYSCALL_SCHED 511 + +#define KUTRACE_SYSCALL(_c) (0x800ULL | KUTRACE_SYSCALL_MASK(_c)) +#define KUTRACE_SYSRET(_c) (0xa00ULL | KUTRACE_SYSCALL_MASK(_c)) + +/* Specific trap number for page fault */ +#define KUTRACE_PAGEFAULT 14 + +#ifdef _KERNEL + +struct lltrace_cpu; + +static inline struct lltrace_cpu * +lltrace_enter_spc(struct schedstate_percpu *spc) +{ + return (READ_ONCE(spc->spc_lltrace)); +} + +static inline struct lltrace_cpu * +lltrace_enter_cpu(struct cpu_info *ci) +{ + return lltrace_enter_spc(&ci->ci_schedstate); +} + +static inline struct lltrace_cpu * +lltrace_enter(void) +{ + return lltrace_enter_cpu(curcpu()); +} + +void lltrace_idle(struct lltrace_cpu *); +void lltrace_statclock(struct lltrace_cpu *, int, unsigned long); + +void lltrace_syscall(struct lltrace_cpu *, register_t, + size_t, const register_t *); +void lltrace_sysret(struct lltrace_cpu *, register_t, + int, const register_t [2]); +void lltrace_pidname(struct lltrace_cpu *, struct proc *); +void lltrace_sched_enter(struct lltrace_cpu *); +void lltrace_sched_leave(struct lltrace_cpu *); +void lltrace_runnable(struct lltrace_cpu *, struct proc *); + +void lltrace_trap(struct lltrace_cpu *, unsigned int); +void lltrace_trapret(struct lltrace_cpu *, unsigned int); + +void lltrace_lock(struct lltrace_cpu *, void *, unsigned int); +void lltrace_klock(struct lltrace_cpu *, void *, unsigned int); + +void lltrace_pkts(struct lltrace_cpu *, unsigned int, unsigned int); +void lltrace_mark(struct lltrace_cpu *); + +/* MD bits */ + +void lltrace_ipi(struct lltrace_cpu *, unsigned int); +#define lltrace_ipi_bcast(_llt) lltrace_ipi((_llt), ~0U); + +void lltrace_irq(struct lltrace_cpu *, unsigned int, unsigned int); +void lltrace_irqret(struct lltrace_cpu *, unsigned int, unsigned int); + +#endif /* _KERNEL */ + +#endif /* _SYS_LLTRACE_H_ */ Index: sys/sys/param.h =================================================================== RCS file: /cvs/src/sys/sys/param.h,v retrieving revision 1.138 diff -u -p -r1.138 param.h --- sys/sys/param.h 4 Apr 2022 21:16:47 -0000 1.138 +++ sys/sys/param.h 6 Jun 2022 06:05:24 -0000 @@ -111,6 +111,8 @@ #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ #define PNORELOCK 0x200 /* OR'd with pri for msleep to not reacquire the mutex */ +#define PLLTRACE 0x400 + #endif /* _KERNEL */ #define NODEV (dev_t)(-1) /* non-existent device */ Index: sys/sys/pool.h =================================================================== RCS file: /cvs/src/sys/sys/pool.h,v retrieving revision 1.78 diff -u -p -r1.78 pool.h --- sys/sys/pool.h 2 Jan 2021 03:23:59 -0000 1.78 +++ sys/sys/pool.h 6 Jun 2022 06:05:24 -0000 @@ -150,8 +150,6 @@ union pool_lock { struct pool { union pool_lock pr_lock; - const struct pool_lock_ops * - pr_lock_ops; SIMPLEQ_ENTRY(pool) pr_poollist; struct pool_pagelist @@ -186,15 +184,18 @@ struct pool { #define PR_RWLOCK 0x0010 #define PR_WANTED 0x0100 - int pr_flags; - int pr_ipl; RBT_HEAD(phtree, pool_page_header) pr_phtree; - struct cpumem * pr_cache; + struct cpumem * pr_cache __aligned(512); unsigned long pr_cache_magic[2]; - union pool_lock pr_cache_lock; + int pr_flags; + int pr_ipl; + const struct pool_lock_ops * + pr_lock_ops; + + union pool_lock pr_cache_lock __aligned(512); struct pool_cache_lists pr_cache_lists; /* list of idle item lists */ u_int pr_cache_nitems; /* # of idle items */ @@ -205,7 +206,7 @@ struct pool { uint64_t pr_cache_ngc; /* # of times the gc released a list */ int pr_cache_nout; - u_int pr_align; + u_int pr_align __aligned(512); u_int pr_maxcolors; /* Cache coloring */ int pr_phoffset; /* Offset in page of page header */ @@ -239,6 +240,7 @@ struct pool { /* Physical memory configuration. */ const struct kmem_pa_mode * pr_crange; + }; #endif /* _KERNEL || _LIBKVM */ Index: sys/sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v retrieving revision 1.57 diff -u -p -r1.57 sched.h --- sys/sys/sched.h 25 Dec 2020 12:49:31 -0000 1.57 +++ sys/sys/sched.h 6 Jun 2022 06:05:24 -0000 @@ -91,11 +91,13 @@ #define SCHED_NQS 32 /* 32 run queues. */ struct smr_entry; +struct lltrace_cpu; /* * Per-CPU scheduler state. */ struct schedstate_percpu { + struct lltrace_cpu *spc_lltrace; struct proc *spc_idleproc; /* idle proc for this cpu */ TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS]; LIST_HEAD(,proc) spc_deadproc; Index: sys/sys/syscall_mi.h =================================================================== RCS file: /cvs/src/sys/sys/syscall_mi.h,v retrieving revision 1.25 diff -u -p -r1.25 syscall_mi.h --- sys/sys/syscall_mi.h 21 Jan 2020 16:16:23 -0000 1.25 +++ sys/sys/syscall_mi.h 6 Jun 2022 06:05:24 -0000 @@ -45,7 +45,6 @@ #include #endif - /* * The MD setup for a system call has been done; here's the MI part. */ @@ -76,6 +75,7 @@ mi_syscall(struct proc *p, register_t co KERNEL_UNLOCK(); } #endif + LLTRACE_CPU(p->p_cpu, lltrace_syscall, code, callp->sy_argsize, argp); /* SP must be within MAP_STACK space */ if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), @@ -113,6 +113,7 @@ static inline void mi_syscall_return(struct proc *p, register_t code, int error, const register_t retval[2]) { + LLTRACE_CPU(p->p_cpu, lltrace_sysret, code, error, retval); #ifdef SYSCALL_DEBUG KERNEL_LOCK(); scdebug_ret(p, code, error, retval); @@ -140,12 +141,13 @@ mi_syscall_return(struct proc *p, regist static inline void mi_child_return(struct proc *p) { -#if defined(SYSCALL_DEBUG) || defined(KTRACE) || NDT > 0 +#if defined(SYSCALL_DEBUG) || defined(KTRACE) || NDT > 0 || NLLT > 0 int code = (p->p_flag & P_THREAD) ? SYS___tfork : (p->p_p->ps_flags & PS_PPWAIT) ? SYS_vfork : SYS_fork; const register_t child_retval[2] = { 0, 1 }; #endif + LLTRACE_CPU(p->p_cpu, lltrace_sysret, code, 0, child_retval); TRACEPOINT(sched, on__cpu, NULL); #ifdef SYSCALL_DEBUG Index: sys/sys/systm.h =================================================================== RCS file: /cvs/src/sys/sys/systm.h,v retrieving revision 1.155 diff -u -p -r1.155 systm.h --- sys/sys/systm.h 9 Dec 2021 00:26:10 -0000 1.155 +++ sys/sys/systm.h 6 Jun 2022 06:05:24 -0000 @@ -322,8 +322,8 @@ extern struct rwlock netlock; * by the NET_LOCK(). It's a single non-recursive lock for the whole * subsystem. */ -#define NET_LOCK() do { rw_enter_write(&netlock); } while (0) -#define NET_UNLOCK() do { rw_exit_write(&netlock); } while (0) +void NET_LOCK(void); +void NET_UNLOCK(void); /* * Reader version of NET_LOCK() to be used in "softnet" thread only. @@ -332,8 +332,8 @@ extern struct rwlock netlock; * without holding an exclusive lock. This is done to allow read-only * ioctl(2) to not block. */ -#define NET_RLOCK_IN_SOFTNET() do { rw_enter_read(&netlock); } while (0) -#define NET_RUNLOCK_IN_SOFTNET()do { rw_exit_read(&netlock); } while (0) +void NET_RLOCK_IN_SOFTNET(void); +void NET_RUNLOCK_IN_SOFTNET(void); /* * Reader version of NET_LOCK() to be used in ioctl/sysctl path only. @@ -341,8 +341,8 @@ extern struct rwlock netlock; * Can be grabbed instead of the exclusive version when no field * protected by the NET_LOCK() is modified by the ioctl/sysctl. */ -#define NET_RLOCK_IN_IOCTL() do { rw_enter_read(&netlock); } while (0) -#define NET_RUNLOCK_IN_IOCTL() do { rw_exit_read(&netlock); } while (0) +void NET_RLOCK_IN_IOCTL(void); +void NET_RUNLOCK_IN_IOCTL(void); #ifdef DIAGNOSTIC Index: sys/sys/time.h =================================================================== RCS file: /cvs/src/sys/sys/time.h,v retrieving revision 1.61 diff -u -p -r1.61 time.h --- sys/sys/time.h 19 Jun 2021 13:49:39 -0000 1.61 +++ sys/sys/time.h 6 Jun 2022 06:05:24 -0000 @@ -313,6 +313,8 @@ time_t getuptime(void); uint64_t nsecuptime(void); uint64_t getnsecuptime(void); +unsigned int countertime(void); + struct proc; int clock_gettime(struct proc *, clockid_t, struct timespec *); Index: sys/sys/tracepoint.h =================================================================== RCS file: /cvs/src/sys/sys/tracepoint.h,v retrieving revision 1.1 diff -u -p -r1.1 tracepoint.h --- sys/sys/tracepoint.h 21 Jan 2020 16:16:23 -0000 1.1 +++ sys/sys/tracepoint.h 6 Jun 2022 06:05:24 -0000 @@ -32,5 +32,33 @@ #define TRACEPOINT(func, name, args...) #endif /* NDT > 0 */ + +#include "llt.h" +#if NLLT > 0 +#include + +#define LLTRACE_SPC(_spc, _fn, ...) { \ + struct lltrace_cpu *_llt = lltrace_enter_spc((_spc)); \ + if (_llt != NULL) \ + (_fn)(_llt __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#define LLTRACE_CPU(_ci, _fn, ...) { \ + struct lltrace_cpu *_llt = lltrace_enter_cpu((_ci)); \ + if (_llt != NULL) \ + (_fn)(_llt __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#define LLTRACE(_fn, ...) { \ + struct lltrace_cpu *_llt = lltrace_enter(); \ + if (_llt != NULL) \ + (_fn)(_llt __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#else /* NLLT > 0 */ + +#define LLTRACE(_fn, ...) + +#endif /* NLLT > 0 */ #endif /* _KERNEL */ #endif /* _SYS_TRACEPOINT_H_ */ Index: sys/uvm/uvm_fault.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_fault.c,v retrieving revision 1.129 diff -u -p -r1.129 uvm_fault.c --- sys/uvm/uvm_fault.c 4 Apr 2022 09:27:05 -0000 1.129 +++ sys/uvm/uvm_fault.c 6 Jun 2022 06:05:24 -0000 @@ -578,6 +578,8 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad struct vm_page *pages[UVM_MAXRANGE]; int error; + LLTRACE(lltrace_trap, LLTRACE_TRAP_PAGEFAULT); + counters_inc(uvmexp_counters, faults); TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL); @@ -641,6 +643,8 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad } } } + + LLTRACE(lltrace_trapret, LLTRACE_TRAP_PAGEFAULT); return error; } Index: usr.bin/lltextract/Makefile =================================================================== RCS file: usr.bin/lltextract/Makefile diff -N usr.bin/lltextract/Makefile --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/lltextract/Makefile 6 Jun 2022 06:05:25 -0000 @@ -0,0 +1,11 @@ +PROG= lltextract +SRCS= lltextract.c syscallnames.c names.c +MAN= + +SYS_DIR= ${.CURDIR}/../../sys +CFLAGS+= -I${SYS_DIR} + +DEBUG= -g +WARNINGS= Yes + +.include Index: usr.bin/lltextract/lltextract.c =================================================================== RCS file: usr.bin/lltextract/lltextract.c diff -N usr.bin/lltextract/lltextract.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/lltextract/lltextract.c 6 Jun 2022 06:05:25 -0000 @@ -0,0 +1,811 @@ +/* $OpenBSD */ + +/* + * Copyright (c) 2022 The University of Queensland + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * + * Copyright 2021 Richard L. Sites + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include /* for SYS_MAXSYSCALL */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "lltextract.h" + +#ifndef nitems +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#endif + +#define kRawVersionNumber 3 + +struct cytime { + uint64_t base_cy; + uint64_t base_ns; + uint64_t base_cy10; + uint64_t base_ns10; + + double slope; + double slope_ns10; +}; + +struct llthread { + pid_t llt_tid; +}; + +struct llevent { + size_t slot; + + uint64_t ns; + int64_t cy; + uint32_t cy32; + + unsigned int event; + uint32_t arg0; + char *name; + pid_t tid; +}; + +struct ring { + uint64_t slots[8192]; +}; + +static void lltextract(size_t, const struct ring *); + +__dead static void +usage(void) +{ + extern char *__progname; + + fprintf(stderr, "usage: %s [-v] [-i infile] [-o outfile]\n", + __progname); + + exit(1); +} + +const char * const ifname_def = "stdin"; +const char * const ofname_def = "stdout"; + +FILE *ifile = stdin; +FILE *ofile = stdout; +int verbose = 0; + +int +main(int argc, char *argv[]) +{ + const char *ifname = ifname_def; + const char *ofname = ofname_def; + struct ring ring; + size_t block = 1; + + int ch; + + while ((ch = getopt(argc, argv, "i:o:v")) != -1) { + switch (ch) { + case 'i': + ifname = optarg; + break; + case 'o': + ofname = optarg; + break; + case 'v': + verbose++; + break; + default: + usage(); + /* NOTREACHED */ + } + } + + argc -= optind; + argv += optind; + + if (argc != 0) + usage(); + + if (ifname != ifname_def) { + ifile = fopen(ifname, "r"); + if (ifile == NULL) + err(1, "%s", ifname); + } + + if (ofname != ofname_def) { + ofile = fopen(ofname, "w"); + if (ofile == NULL) + err(1, "%s", ofname); + } + + fprintf(ofile, "# ## VERSION: %d\n", kRawVersionNumber); + + for (;;) { + size_t nread = fread(&ring, sizeof(ring), 1, ifile); + if (nread == 0) { + if (ferror(ifile)) + errx(1, "error reading %s", ifname); + if (feof(ifile)) + break; + } + + lltextract(block++, &ring); + } + + return (0); +} + +static int +printable(int ch) +{ + if (ch == '\0') + return ('_'); + if (!isprint(ch)) + return ('~'); + + return (ch); +} + +static void +dump_slot(size_t slot, uint64_t v) +{ + uint8_t buf[sizeof(v)]; + size_t i; + + printf("## slot %4zu = 0x%016llx |", slot, v); + + memcpy(buf, &v, sizeof(buf)); + for (i = 0; i < sizeof(buf); i++) + putchar(printable(buf[i])); + + printf("|\n"); +} + +static void +dump_slots(const struct ring *ring, size_t slot, size_t n) +{ + n += slot; + while (slot < n) { + dump_slot(slot, ring->slots[slot]); + slot++; + } +} + +static void +cytime_init(struct cytime *ct, + uint64_t start_cy, uint64_t start_ns, uint64_t stop_cy, uint64_t stop_ns) +{ + uint64_t diff_cy = stop_cy - start_cy; + uint64_t diff_ns = stop_ns - start_ns; + + ct->base_cy = start_cy; + ct->base_ns = start_ns; + + ct->slope = (double)diff_ns / (double)diff_cy; + ct->slope_ns10 = ct->slope / 10.0; + + if (verbose >= 1) { + printf("SetParams maps %18llucy ==> %18lluns\n", + start_cy, start_ns); + printf("SetParams maps %18llucy ==> %18lluns\n", + stop_cy, stop_ns); + printf(" diff %18llucy ==> %18lluns\n", + diff_cy, diff_ns); + printf("SetParams slope %f ns/cy\n", ct->slope); + } +} + +struct lltstate { + struct cytime ct; + + uint32_t cy32; + int64_t cy; + unsigned int cpu; + pid_t idletid; + pid_t tid; +}; + +static void lltextract_varlen(struct lltstate *, struct llevent *, + unsigned int, uint64_t, const uint64_t *, unsigned int); +static void lltextract_mark(struct lltstate *, struct llevent *, + unsigned int, uint64_t); +static void lltextract_irq(struct lltstate *, struct llevent *, + unsigned int, uint64_t); +static void lltextract_syscall(struct lltstate *, struct llevent *, + unsigned int, uint64_t); +static void lltextract_sysret(struct lltstate *, struct llevent *, + unsigned int, uint64_t); + +static struct llevent llevents[8192]; + +#define TS32_SHIFT (32 - LLTRACE_TIMESTAMP_BITS) + +static void +lltextract(size_t block, const struct ring *ring) +{ + const struct lltrace_header *llh = (struct lltrace_header *)ring; + struct lltstate state = { + .cpu = llh->h_cpu, + .idletid = llh->h_idletid, + + .cy = 0, + }; + time_t boottime; + struct tm *tm; + + size_t slot; + uint32_t cy32; + int32_t cydiff; + + unsigned int ev, len; + struct llevent *lle = llevents; + + if (verbose >= 2) + dump_slots(ring, 0, 8); + + cytime_init(&state.ct, ring->slots[2], ring->slots[3], + ring->slots[4], ring->slots[5]); + + //state.cy = llh->h_start_cy; + state.cy32 = ring->slots[2] << TS32_SHIFT; + + boottime = ring->slots[1] / 1000000000; + tm = localtime(&boottime); + + fprintf(ofile, "# [%zu] %04d-%02d-%02d_%02d:%02d:%02d.%06lld\n", block, + tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, tm->tm_min, tm->tm_sec, + (boottime / 1000) % 1000000); + + fprintf(ofile, "# TS DUR EVENT CPU PID RPC ARG0 RETVAL IPC NAME " + "(t and dur multiples of 10ns)\n"); + + state.tid = (llh->h_idletid == llh->h_tid) ? 0 : (llh->h_tid & 0xffff); + + for (slot = 8; slot < nitems(ring->slots); slot++) { + uint64_t v = ring->slots[slot]; + if (verbose >= 2) + dump_slot(slot, v); + + if (v == 0) + return; + + free(lle->name); + memset(lle, 0, sizeof(*lle)); + + cy32 = (v >> LLTRACE_TIMESTAMP_SHIFT) & LLTRACE_TIMESTAMP_MASK; + cy32 <<= TS32_SHIFT; + cydiff = (cy32 - state.cy32); + cydiff >>= TS32_SHIFT; + + if (verbose >= 2) + printf("## state.cy %llu state.cy32 %u diff %d\n", state.cy, state.cy32, cydiff); + + state.cy32 = cy32; + state.cy += cydiff; + lle->cy = state.cy; + lle->ns = state.ct.base_ns + + (lle->cy * state.ct.slope); + + if (verbose >= 2) { + printf("## lle cy %llu\n", lle->cy); + printf("## +%llu.%09llus\n", + lle->ns / 1000000000, lle->ns % 1000000000); + } + + /* decode timestamp here */ + + ev = (v >> LLTRACE_EVENT_SHIFT) & LLTRACE_EVENT_MASK; + + switch (ev & LLTRACE_EVENT_T_MASK) { + case LLTRACE_EVENT_T_VARLEN: + len = ev & LLTRACE_EVENT_VARLEN_MASK; + if (verbose >= 2) + dump_slots(ring, slot + 1, len); + + lltextract_varlen(&state, lle, ev, v, + ring->slots + slot + 1, len); + + slot += len; + break; + case LLTRACE_EVENT_T_MARK: + lltextract_mark(&state, lle, ev, v); + break; + case LLTRACE_EVENT_T_IRQ: + lltextract_irq(&state, lle, ev, v); + break; + case LLTRACE_EVENT_T_SYSCALL: + lltextract_syscall(&state, lle, ev, v); + break; + case LLTRACE_EVENT_T_SYSRET: + lltextract_sysret(&state, lle, ev, v); + break; + default: + errx(1, "unexpected event 0x%3x", ev); + /* NOTREACHED */ + } + + // # TS DUR EVENT CPU PID RPC ARG0 RETVAL IPC NAME (t and dur multiples of 10ns) + fprintf(ofile, "%llu 0 %u %u %u 0 %u 0 0 %s (%03x)\n", + lle->ns/10, lle->event, + //lle->cy, lle->event, + state.cpu, lle->tid, lle->arg0, lle->name, lle->event); + + lle++; + } +} + +static void +lltextract_pc(struct llevent *lle, int event, uint64_t pc) +{ + lle->event = event; + + /* + * XXX The PC sample is generated after the local_timer + * interrupt, but we really want its sample time to be just + * before that interrupt. + */ + + /* + * Put a hash of the PC name into arg, so HTML display can + * choose colors quickly. + */ + lle->arg0 = (pc >> 6) & 0xffff; + + if (event == KUTRACE_PC_K) { + const struct ksym *k; + + k = ksym_nfind(pc); + if (k != NULL) { + if (asprintf(&lle->name, "PC=%s", k->name) == -1) + errx(1, "PC_K name asprintf"); + return; + } + } + + if (asprintf(&lle->name, "PC=%016llx", pc) == -1) + errx(1, "PC asprintf"); +} + +static char * +xstrdup(const char *src) +{ + char *dst; + + dst = strdup(src); + if (dst == NULL) + err(1, "strdup %s", src); + + return (dst); +} + +static void +lltextract_varlen(struct lltstate *state, struct llevent *lle, + unsigned int ev, uint64_t v, const uint64_t *var, unsigned int len) +{ + ev &= ~LLTRACE_EVENT_VARLEN_MASK; + pid_t tid; + int kthread; + + switch (ev) { + case LLTRACE_EVENT_PID: + tid = (v >> LLTRACE_ARG32_SHIFT) & LLTRACE_ARG32_MASK; + kthread = !!(tid & LLTRACE_EVENT_PID_ARG_KTHREAD); + tid &= ~LLTRACE_EVENT_PID_ARG_KTHREAD; + + if (tid == state->idletid) { + /* + * map the per-cpu idle procs in OpenBSD to a single + * idle thread on all cpus at tid 0. + */ + tid = 0; + lle->name = xstrdup("-idle-"); + } else { + /* + * tools further down the chain rely on pids/tids + * being 16bit + */ + tid &= 0xffff; + + if (len > 0) { + fprintf(ofile, "%llu 0 %llu %u %s%.*s\n", + lle->ns/10, KUTRACE_PIDNAME, tid, + //lle->cy, KUTRACE_PIDNAME, tid, + kthread ? "kworker/" : "", + (int)(len * sizeof(*var)), (char *)var); + } + + if (asprintf(&lle->name, "%s%.*s.%u", + kthread ? "kworker/" : "", + (int)(len * sizeof(*var)), (char *)var, + tid) == -1) + errx(1, "pid asprintf"); + } + + lle->tid = tid; + lle->event = KUTRACE_USERPID; + + state->tid = tid; + break; + + case LLTRACE_EVENT_PC_U: + lltextract_pc(lle, KUTRACE_PC_U, var[0]); + break; + case LLTRACE_EVENT_PC_K: + lltextract_pc(lle, KUTRACE_PC_K, var[0]); + break; + + default: + errx(1, "unexpected varlen event 0x%03x", ev); + /* NOTREACHED */ + } +} + +static char * +trap_name(unsigned int trap) +{ + const char *source; + char *name; + + switch (trap) { + case LLTRACE_TRAP_PAGEFAULT: + source = "page_fault"; + break; + default: + if (asprintf(&name, "trap-%u", trap) == -1) + errx(1, "trap asprintf"); + return (name); + } + + name = xstrdup(source); + + return (name); +} + +static void +lltextract_trap(struct lltstate *state, struct llevent *lle, + unsigned int event, uint64_t v) +{ + unsigned int trap; + + trap = (v >> LLTRACE_ARG32_SHIFT) & LLTRACE_ARG32_MASK; + + lle->tid = state->tid; + lle->event = event + trap; + lle->name = trap_name(trap); +} + +static void +lltextract_sched(struct lltstate *state, struct llevent *lle, + unsigned int event) +{ + lle->tid = state->tid; + lle->event = event; + lle->arg0 = 0; + lle->name = xstrdup("-sched-"); +} + +static void +lltextract_lock(struct lltstate *state, struct llevent *lle, + unsigned int event, uint64_t v) +{ + const struct ksym *k; + unsigned int addr, lock; + + addr = (v >> LLTRACE_ARG32_SHIFT) & LLTRACE_ARG32_MASK; + lock = addr & 0xffff; + + lle->tid = state->tid; + lle->event = event; + + k = ksym_nfind(addr); + if (k != NULL) { + uint32_t diff = addr - k->addr; + lle->arg0 = addr; + + if (diff == 0) + lle->name = xstrdup(k->name); + else { + if (asprintf(&lle->name, "%s+%u", k->name, diff) == -1) + err(1, "lock %s asprintf", k->name); + } + + fprintf(ofile, "%llu 0 %llu %u %s\n", + lle->ns/10, KUTRACE_LOCKNAME, lle->arg0, lle->name); + } else { + lle->arg0 = lock; + if (asprintf(&lle->name, "lock.%x", lock) == -1) + err(1, "lock asprintf"); + } +} + +static void +lltextract_pkts(struct lltstate *state, struct llevent *lle, uint64_t v) +{ + unsigned int type = v & LLTRACE_PKTS_T_MASK; + const char *name; + + switch (type) { + case LLTRACE_PKTS_T_IFQ: + name = "ifq"; + break; + case LLTRACE_PKTS_T_NETTQ: + name = "process"; + break; + case LLTRACE_PKTS_T_IFIQ: + name = "ifiq"; + break; + case LLTRACE_PKTS_T_QDROP: + name = "qdrop"; + break; + case LLTRACE_PKTS_T_HDROP: + name = "hdrop"; + break; +#ifdef LLTRACE_PKTS_T_BDROP + case LLTRACE_PKTS_T_BDROP: + name = "bdrop"; + break; +#endif + default: + errx(1, "unexpected pkts type %x", + type >> LLTRACE_PKTS_T_SHIFT); + /* NOTREACHED */ + } + + lle->tid = state->tid; + lle->event = KUTRACE_MARKA; /* sure */ + lle->arg0 = v & LLTRACE_PKTS_V_MASK; + lle->name = xstrdup(name); + +#if 0 + if (asprintf(&lle->name, "%s=%llu", name, + v & LLTRACE_PKTS_V_MASK) == -1) + errx(1, "pkts asprintf"); +#endif +} + +static void +lltextract_func(struct lltstate *state, struct llevent *lle, + unsigned int event, const char *evname, uint64_t v) +{ + const struct ksym *k; + + lle->arg0 = (v >> LLTRACE_ARG32_SHIFT) & LLTRACE_ARG32_MASK; + + lle->tid = state->tid; + lle->event = event; + + k = ksym_nfind(lle->arg0); + if (k != NULL) { + uint32_t diff = lle->arg0 - k->addr; + if (diff == 0) { + if (asprintf(&lle->name, "%s=%s", evname, + k->name) == -1) + err(1, "kfunc %s asprintf", evname); + } else { + if (asprintf(&lle->name, "%s=%s+%u", evname, + k->name, diff) == -1) + err(1, "kfunc %s asprintf", evname); + } + } else { + if (asprintf(&lle->name, "%s=0x%x", evname, lle->arg0) == -1) + err(1, "kfunc %s asprintf", evname); + } +} + +static void +lltextract_mark(struct lltstate *state, struct llevent *lle, + unsigned int ev, uint64_t v) +{ + + switch (ev) { + case LLTRACE_EVENT_IDLE: + lle->event = KUTRACE_MWAIT; + lle->arg0 = 255; + + lle->name = xstrdup("mwait"); + break; + + case LLTRACE_EVENT_RUNNABLE: + lle->tid = state->tid; + lle->event = KUTRACE_RUNNABLE; + lle->arg0 = (v >> LLTRACE_ARG32_SHIFT) & LLTRACE_ARG32_MASK; + lle->arg0 &= 0xffff; + + if (asprintf(&lle->name, "runnable.%u", lle->arg0) == -1) + err(1, "runnable asprintf"); + break; + + case LLTRACE_EVENT_IPI: + lle->tid = state->tid; + lle->event = KUTRACE_IPI; + lle->arg0 = (v >> LLTRACE_ARG32_SHIFT) & LLTRACE_ARG32_MASK; + + lle->name = xstrdup("sendipi"); + break; + + case LLTRACE_EVENT_SCHED: + lltextract_sched(state, lle, + KUTRACE_SYSCALL(KUTRACE_SYSCALL_SCHED)); + break; + case LLTRACE_EVENT_SCHEDRET: + lltextract_sched(state, lle, + KUTRACE_SYSRET(KUTRACE_SYSCALL_SCHED)); + break; + + case LLTRACE_EVENT_TRAP: + lltextract_trap(state, lle, KUTRACE_TRAP, v); + break; + case LLTRACE_EVENT_TRAPRET: + lltextract_trap(state, lle, KUTRACE_TRAPRET, v); + break; + + case LLTRACE_EVENT_LOCK(LLTRACE_LOCK_NOACQUIRE): + lltextract_lock(state, lle, KUTRACE_LOCKNOACQUIRE, v); + break; + case LLTRACE_EVENT_LOCK(LLTRACE_LOCK_ACQUIRE): + lltextract_lock(state, lle, KUTRACE_LOCKACQUIRE, v); + break; + case LLTRACE_EVENT_LOCK(LLTRACE_LOCK_WAKEUP): + lltextract_lock(state, lle, KUTRACE_LOCKWAKEUP, v); + break; + + case LLTRACE_EVENT_PKTS: + lltextract_pkts(state, lle, v); + break; + + case LLTRACE_EVENT_MARK: + lle->tid = state->tid; + lle->event = KUTRACE_MARKB; + lle->arg0 = 0; + + lle->name = xstrdup("markd=yep"); + break; + + case LLTRACE_EVENT_KFUNC_ENTER: + lltextract_func(state, lle, KUTRACE_MARKD, "enter", v); + break; + + case LLTRACE_EVENT_KFUNC_LEAVE: + lltextract_func(state, lle, KUTRACE_MARKD, "leave", v); + break; + + default: + errx(1, "unexpected mark event 0x%03x", ev); + /* NOTREACHED */ + } +} + +static char * +irq_name(unsigned int type, unsigned int vec) +{ + const char *source; + char *name; + + switch (type) { + case LLTRACE_IRQ_IPI: + source = "ipi"; + break; + case LLTRACE_IRQ_BOTTOM_HALF: + if (vec == 0) + return xstrdup("BH:timer"); + + source = "BH"; + break; + case LLTRACE_IRQ_LOCAL_TIMER: + return xstrdup("local_timer_vector"); + default: + if (asprintf(&name, "irq%u:%u", type, vec) == -1) + errx(1, "irq asprintf"); + return (name); + } + + if (asprintf(&name, "%s:%u", source, vec) == -1) + errx(1, "irq %s asprintf", source); + + return (name); +} + +static void +lltextract_irq(struct lltstate *state, struct llevent *lle, + unsigned int ev, uint64_t v) +{ + unsigned int ret = ev & 0x100; + unsigned int type = ev & 0xff; + unsigned int vec = (v >> LLTRACE_ARG32_SHIFT) & LLTRACE_ARG32_MASK; + + lle->event = (ret ? KUTRACE_IRQRET : KUTRACE_IRQ) | type; + lle->arg0 = vec; + + lle->name = irq_name(type, vec); +} + +static char * +syscall_name(unsigned int sc) +{ + extern const char *const syscallnames[]; + char *name; + + if (sc < SYS_MAXSYSCALL) + name = xstrdup(syscallnames[sc]); + else { + if (asprintf(&name, "syscall-%u", sc) == -1) + errx(1, "syscall name asprintf"); + } + + return (name); +} + +static void +lltextract_syscall(struct lltstate *state, struct llevent *lle, + unsigned int ev, uint64_t v) +{ + unsigned int sc = LLTRACE_SYSCALL_MASK(ev); + + lle->tid = state->tid; + lle->event = KUTRACE_SYSCALL(sc); + lle->arg0 = (v >> LLTRACE_ARG0_SHIFT) & LLTRACE_ARG0_MASK; + lle->name = syscall_name(sc); +} + +static void +lltextract_sysret(struct lltstate *state, struct llevent *lle, + unsigned int ev, uint64_t v) +{ + unsigned int sc = LLTRACE_SYSCALL_MASK(ev); + + lle->tid = state->tid; + lle->event = KUTRACE_SYSRET(sc); + lle->arg0 = (v >> LLTRACE_ARG0_SHIFT) & LLTRACE_ARG0_MASK; + lle->name = syscall_name(sc); +} + Index: usr.bin/lltextract/lltextract.h =================================================================== RCS file: usr.bin/lltextract/lltextract.h diff -N usr.bin/lltextract/lltextract.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/lltextract/lltextract.h 6 Jun 2022 06:05:25 -0000 @@ -0,0 +1,29 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +struct ksym { + RBT_ENTRY(ksym) entry; + char *name; + uint32_t addr; + uint32_t len; +}; + +const struct ksym *ksym_find(uint32_t); +const struct ksym *ksym_nfind(uint32_t); Index: usr.bin/lltextract/names.c =================================================================== RCS file: usr.bin/lltextract/names.c diff -N usr.bin/lltextract/names.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/lltextract/names.c 6 Jun 2022 06:05:25 -0000 @@ -0,0 +1,132 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lltextract.h" + +#define DBNAME "/var/db/kvm_bsd.db" + +HASHINFO openinfo = { + 4096, /* bsize */ + 128, /* ffactor */ + 1024, /* nelem */ + 2048 * 1024, /* cachesize */ + NULL, /* hash() */ + 0 /* lorder */ +}; + +RBT_HEAD(ksyms, ksym); + +RBT_PROTOTYPE(ksyms, ksym, entry, ksym_cmp); + +static struct ksyms _ksyms = RBT_INITIALIZER(ksyms); + +static void +knames_load(struct ksyms *ksyms) +{ + DB *db; + DBT key, data; + struct nlist n; + struct ksym *k; + + db = dbopen(DBNAME, O_RDONLY, 0, DB_HASH, NULL); + if (db == NULL) + err(1, "%s", DBNAME); + + for (;;) { + int rv = db->seq(db, &key, &data, R_NEXT); + if (rv == -1) + errx(1, "%s seq", DBNAME); + + if (rv != 0) + break; + + if (key.size < 2 || *(const char *)key.data != '_') + continue; + if (data.size != sizeof(n)) + continue; + + memcpy(&n, data.data, sizeof(n)); + //if (n.n_type != N_TEXT) + // continue; + + k = malloc(sizeof(*k) + key.size); + if (k == NULL) + err(1, "%s ksym", __func__); + + k->addr = n.n_value; + k->len = 0; + k->name = (char *)(k + 1); + + memcpy(k->name, (const char *)key.data + 1, key.size - 1); + k->name[key.size - 1] = '\0'; + + if (RBT_INSERT(ksyms, ksyms, k) != NULL) + free(k); + } + + db->close(db); +} + +const struct ksym * +ksym_find(uint32_t addr) +{ + struct ksyms *ksyms = &_ksyms; + struct ksym key = { .addr = addr }; + + if (RBT_EMPTY(ksyms, ksyms)) + knames_load(ksyms); + + return (RBT_FIND(ksyms, ksyms, &key)); +} + +const struct ksym * +ksym_nfind(uint32_t addr) +{ + struct ksyms *ksyms = &_ksyms; + struct ksym key = { .addr = addr }; + + if (RBT_EMPTY(ksyms, ksyms)) + knames_load(ksyms); + + return (RBT_NFIND(ksyms, ksyms, &key)); +} + +static inline int +ksym_cmp(const struct ksym *a, const struct ksym *b) +{ + if (a->addr > b->addr) + return (-1); + if (a->addr < b->addr) + return (1); + return (0); +} + +RBT_GENERATE(ksyms, ksym, entry, ksym_cmp); Index: usr.bin/lltextract/syscallnames.c =================================================================== RCS file: usr.bin/lltextract/syscallnames.c diff -N usr.bin/lltextract/syscallnames.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/lltextract/syscallnames.c 6 Jun 2022 06:05:25 -0000 @@ -0,0 +1,26 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define ACCOUNTING +#define KTRACE +#define PTRACE +#define SYSVMSG +#define SYSVSEM +#define SYSVSHM + +#include Index: usr.sbin/lltrace/Makefile =================================================================== RCS file: usr.sbin/lltrace/Makefile diff -N usr.sbin/lltrace/Makefile --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.sbin/lltrace/Makefile 6 Jun 2022 06:05:27 -0000 @@ -0,0 +1,13 @@ +# $OpenBSD$ + +PROG= lltrace +SRCS= lltrace.c +MAN= + +LDADD= -levent +DPADD= ${LIBEVENT} + +WARNINGS= Yes +DEBUG= -g + +.include Index: usr.sbin/lltrace/lltrace.c =================================================================== RCS file: usr.sbin/lltrace/lltrace.c diff -N usr.sbin/lltrace/lltrace.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.sbin/lltrace/lltrace.c 6 Jun 2022 06:05:27 -0000 @@ -0,0 +1,690 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "/sys/sys/lltrace.h" + +#ifndef nitems +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#endif + +#define DEV_KUTRACE "/dev/lltrace" + +#define NRINGS_DEFAULT 256 /* 256 * 8192 * 8 is 16MB */ + +struct lltrace; + +struct mode { + const char *name; + void *(*setup)(struct lltrace *, int, char **); + int (*run)(struct lltrace *); +}; + +static void *mode_kill_setup(struct lltrace *, int, char **); +static int mode_kill_run(struct lltrace *); + +static const struct mode mode_kill = { + "kill", mode_kill_setup, mode_kill_run +}; + +static void *mode_wait_setup(struct lltrace *, int, char **); +static int mode_wait_run(struct lltrace *); +static void *mode_exec_setup(struct lltrace *, int, char **); +static int mode_exec_run(struct lltrace *); + +static const struct mode modes[] = { + { "wait", mode_wait_setup, mode_wait_run }, + { "exec", mode_exec_setup, mode_exec_run }, +}; + +static const struct mode * + mode_lookup(const char *); +static const char *outfile_default(const char *); + +__dead static void +usage(void) +{ + extern char *__progname; + + fprintf(stderr, "usage: %s [-v] [-m blen] [-o output] [-p prefix] " + "[command]\n", __progname); + fprintf(stderr, " %s wait seconds\n", __progname); + fprintf(stderr, " %s exec program ...\n", __progname); + + exit(-1); +} + +struct lltrace { + const char *outfile; + int dv; /* /dev/lltrace fd */ + int of; /* outfile fd */ + void *mode; + + struct event dv_ev; /* handle reading from the kernel */ + + unsigned int blen; + size_t nbuffers; + struct lltrace_buffer + *buffers; + size_t buffer_idx; + + uint64_t nsec_first; + uint64_t nsec_last; + uint64_t count_buffers; + uint64_t count_slots; + uint64_t count_drops; +}; + +static void lltrace_start(struct lltrace *); +static void lltrace_stop(struct lltrace *); + +static void lltrace_read(int, short, void *); +static void lltrace_flush(struct lltrace *); + +int +main(int argc, char *argv[]) +{ + extern char *__progname; + const char *prefix = __progname; + const struct mode *mode = &mode_kill; + int ch; + const char *errstr; + int verbose = 0; + int prio; + + struct lltrace lltrace = { + .outfile = NULL, + .blen = 0, + .nbuffers = NRINGS_DEFAULT, + + .nsec_first = ~0, + .nsec_last = 0, + .count_buffers = 0, + .count_slots = 0, + .count_drops = 0, + }; + struct lltrace *llt = &lltrace; + unsigned int tmode = LLTRACE_MODE_COUNT; + + while ((ch = getopt(argc, argv, "m:n:o:p:tv")) != -1) { + switch (ch) { + case 'm': + llt->blen = strtonum(optarg, + LLTRACE_BLEN_MIN, LLTRACE_BLEN_MAX, &errstr); + if (errstr != NULL) { + errx(1, "kernel buffer len %s: %s", + optarg, errstr); + } + break; + case 'n': + llt->nbuffers = strtonum(optarg, 4, 4096, &errstr); + if (errstr != NULL) { + errx(1, "number of buffers %s: %s", + optarg, errstr); + } + break; + case 'o': + llt->outfile = optarg; + break; + case 'p': + prefix = optarg; + break; + case 't': + tmode = LLTRACE_MODE_TAIL; + break; + case 'v': + verbose = 1; + break; + default: + usage(); + /* NOTREACHED */ + } + } + + argc -= optind; + argv += optind; + + optreset = optind = opterr = 1; /* kill mode has to be careful */ + + if (argc > 0) { + mode = mode_lookup(argv[0]); + if (mode == NULL) + errx(1, "unknown mode %s", argv[0]); + } + + if (llt->outfile == NULL) + llt->outfile = outfile_default(prefix); + + event_init(); + + llt->mode = (*mode->setup)(llt, argc, argv); + + llt->dv = open(DEV_KUTRACE, O_NONBLOCK|O_RDWR|O_CLOEXEC); + if (llt->dv == -1) + err(1, "%s", DEV_KUTRACE); + + if (llt->blen != 0) { + if (ioctl(llt->dv, LLTIOCSBLEN, &llt->blen) == -1) + err(1, "set kernel buffer len %u", llt->blen); + } + if (tmode != LLTRACE_MODE_COUNT) { + if (ioctl(llt->dv, LLTIOCSMODE, &tmode) == -1) + err(1, "set mode %u", tmode); + } + + event_set(&llt->dv_ev, llt->dv, EV_READ|EV_PERSIST, + lltrace_read, llt); + + llt->of = open(llt->outfile, O_WRONLY|O_CREAT|O_CLOEXEC|O_TRUNC, 0640); + if (llt->of == -1) + err(1, "open %s", llt->outfile); + + llt->buffers = calloc(llt->nbuffers, sizeof(*llt->buffers)); + if (llt->buffers == NULL) + err(1, "unable to allocate %zu buffers", llt->nbuffers); + + llt->buffer_idx = 0; + + if ((*mode->run)(llt) == -1) + exit(1); + + prio = getpriority(PRIO_PROCESS, 0); + if (setpriority(PRIO_PROCESS, 0, -20) == -1) + err(1, "setpriority -20"); + + lltrace_start(llt); + + event_dispatch(); + + if (setpriority(PRIO_PROCESS, 0, prio) == -1) + err(1, "setpriority %d", prio); + + if (llt->buffer_idx != 0) + lltrace_flush(llt); + + if (verbose) { + uint64_t diff = llt->nsec_last - llt->nsec_first; + double interval = (double)diff / 1000000000.0; + int mib[] = { CTL_HW, HW_NCPU }; + int ncpus; + size_t ncpuslen = sizeof(ncpus); + + if (sysctl(mib, nitems(mib), &ncpus, &ncpuslen, NULL, 0) == -1) + err(1, "sysctl hw.ncpus"); + + printf("output file: %s\n", llt->outfile); + printf("interval: %.03lfs, ncpus: %d\n", interval, ncpus); + printf("buffers: %llu (%.01lf/cpu/s), " + "slots: %llu (%.01lf/cpu/s)\n", + llt->count_buffers, llt->count_buffers / interval / ncpus, + llt->count_slots, llt->count_slots / interval / ncpus); + printf("drops: %llu (%.01lf/cpu/s)\n", + llt->count_drops, llt->count_drops / interval / ncpus); + } + + return (0); +} + +static void +lltrace_start(struct lltrace *llt) +{ + event_add(&llt->dv_ev, NULL); + + if (ioctl(llt->dv, LLTIOCSTART) == -1) + err(1, "lltrace start"); +} + +static void +lltrace_flush(struct lltrace *llt) +{ + size_t len; + ssize_t rv; + + len = llt->buffer_idx * sizeof(*llt->buffers); + rv = write(llt->of, llt->buffers, len); + if (rv == -1) + err(1, "%s write", llt->outfile); + + if ((size_t)rv < len) { + errx(1, "%s write short (%zd/%zu bytes)", + llt->outfile, rv, len); + } +} + +static int +lltrace_read_one(struct lltrace *llt) +{ + struct lltrace_buffer *buffer; + ssize_t rv; + uint64_t nsec; + + if (llt->buffer_idx >= llt->nbuffers) { + size_t i, j; + + lltrace_flush(llt); + + /* reset */ + llt->buffer_idx = 0; + + /* + * memset(llt->buffers, 0, + * llt->nbuffers * sizeof(*llt->buffers)); + */ + for (i = 0; i < llt->nbuffers; i++) { + buffer = llt->buffers + i; + + for (j = 0; j < nitems(buffer->llt_slots); j++) + buffer->llt_slots[j] = 0; + } + } + + buffer = llt->buffers + llt->buffer_idx; + rv = read(llt->dv, buffer, sizeof(*buffer)); + if (rv == -1) { + switch (errno) { + case EAGAIN: + /* try again later */ + return (EAGAIN); + case ENOENT: + /* we're done */ + event_del(&llt->dv_ev); + return (ENOENT); + default: + err(1, "%s read", DEV_KUTRACE); + /* NOTREACHED */ + } + } + + if (rv == 0) { + /* we're done */ + event_del(&llt->dv_ev); + return (ENOENT); + } + + llt->buffer_idx++; + + nsec = buffer->llt_slots[3]; + if (nsec < llt->nsec_first) + llt->nsec_first = nsec; + + nsec = buffer->llt_slots[5]; + if (nsec > llt->nsec_last) + llt->nsec_last = nsec; + + llt->count_buffers++; + llt->count_slots += rv / sizeof(uint64_t); + //llt->count_drops += buffer->slots[7]; + + return (0); +} + +static void +lltrace_read(int dv, short events, void *arg) +{ + struct lltrace *llt = arg; + + lltrace_read_one(llt); +} + +static void +lltrace_stop(struct lltrace *llt) +{ + int error; + + if (ioctl(llt->dv, LLTIOCSTOP) == -1) { + if (errno != EALREADY) + err(1, "lltrace stop"); + } + + do { + error = lltrace_read_one(llt); + } while (error == 0); + + event_del(&llt->dv_ev); +} + +static const char * +outfile_default(const char *prefix) +{ + char host[MAXHOSTNAMELEN]; + time_t now; + struct tm *tm; + char *outfile; + + if (gethostname(host, sizeof(host)) == -1) + err(1, "gethostname"); + + now = time(NULL); + + tm = localtime(&now); + + if (asprintf(&outfile, "%s_%04d%02d%02d_%02d%02d%02d_%s.lltrace", + prefix, + tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, tm->tm_min, tm->tm_sec, + host) == -1) + errx(1, "error generating default output filename"); + + return (outfile); +} + +#if 0 +static int +printable(int ch) +{ + if (ch == '\0') + return ('_'); + if (!isprint(ch)) + return ('~'); + + return (ch); +} + +static void +hexdump(const void *d, size_t datalen) +{ + const uint8_t *data = d; + size_t i, j = 0; + + for (i = 0; i < datalen; i += j) { +#if 0 + printf("%04zu: ", i); + for (j = 0; j < 16 && i+j < datalen; j++) + printf("%02x ", data[i + j]); + while (j++ < 16) + printf(" "); +#endif + printf("|"); + + for (j = 0; j < 16 && i+j < datalen; j++) + putchar(printable(data[i + j])); + printf("|\n"); + } +} +#endif + +static const struct mode * +mode_lookup(const char *name) +{ + size_t i; + + for (i = 0; i < nitems(modes); i++) { + const struct mode *mode = &modes[i]; + + if (strcmp(mode->name, name) == 0) + return (mode); + } + + return (NULL); +} + +static void +mode_kill_event(int nil, short events, void *arg) +{ + struct lltrace *llt = arg; + struct event *ev = llt->mode; + + fprintf(stdout, "lltrace stopped\n"); + fflush(stdout); + + event_del(ev); + + lltrace_stop(llt); +} + +static void * +mode_kill_setup(struct lltrace *llt, int argc, char *argv[]) +{ + struct event *ev; + + if (argc != 0) + usage(); + + ev = malloc(sizeof(*ev)); + if (ev == NULL) + err(1, NULL); + + signal_set(ev, SIGINT, mode_kill_event, llt); + return (ev); +} + +static int +mode_kill_run(struct lltrace *llt) +{ + struct event *ev = llt->mode; + + signal_add(ev, NULL); + + fprintf(stdout, "lltrace starting, press Ctrl-C to end...\n"); + fflush(stdout); + + return (0); +} + +/* + * lltrace for specified number of seconds. + */ + +struct mode_wait_state { + struct lltrace *llt; + struct timeval tv; + struct event tmo; + struct event sig; +}; + +static void +mode_wait_tmo(int wat, short events, void *arg) +{ + struct mode_wait_state *state = arg; + struct lltrace *llt = state->llt; + + signal_del(&state->sig); + lltrace_stop(llt); +} + +static void +mode_wait_sig(int wat, short events, void *arg) +{ + struct mode_wait_state *state = arg; + struct lltrace *llt = state->llt; + + evtimer_del(&state->tmo); + signal_del(&state->sig); + lltrace_stop(llt); +} + +static void * +mode_wait_setup(struct lltrace *llt, int argc, char *argv[]) +{ + struct mode_wait_state *state; + const char *errstr; + + if (argc != 2) + usage(); + + state = malloc(sizeof(*state)); + if (state == NULL) + err(1, NULL); + + state->llt = llt; + + state->tv.tv_sec = strtonum(argv[1], 1, 600, &errstr); + if (errstr != NULL) + errx(1, "wait time %s: %s", argv[1], errstr); + + state->tv.tv_usec = 0; + + evtimer_set(&state->tmo, mode_wait_tmo, state); + signal_set(&state->sig, SIGINT, mode_wait_sig, state); + + return (state); +} + +static int +mode_wait_run(struct lltrace *llt) +{ + struct mode_wait_state *state = llt->mode; + + evtimer_add(&state->tmo, &state->tv); + signal_add(&state->sig, NULL); + + return (0); +} + +/* + * trace the execution of a (child) program + */ + +struct mode_exec_state { + struct lltrace *llt; + + char **argv; + + pid_t pid; + struct event sigchld; + struct event sigint; + + uid_t uid; + gid_t gid; + gid_t groups[NGROUPS_MAX]; + int ngroups; +}; + +static void +mode_exec_sig(int wat, short events, void *arg) +{ + struct mode_exec_state *state = arg; + struct lltrace *llt = state->llt; + + /* do we check the pid? */ + + signal_del(&state->sigchld); + signal_del(&state->sigint); + lltrace_stop(llt); +} + +static void * +mode_exec_setup(struct lltrace *llt, int argc, char *argv[]) +{ + struct mode_exec_state *state; + const char *user = NULL; + int ch; + + while ((ch = getopt(argc, argv, "u:")) != -1) { + switch (ch) { + case 'u': + user = optarg; + break; + default: + usage(); + /* NOTREACHED */ + } + } + + argc -= optind; + argv += optind; + + if (argc == 0) { + warnx("no command specified"); + usage(); + } + + state = malloc(sizeof(*state)); + if (state == NULL) + err(1, NULL); + + state->llt = llt; + state->argv = argv; + state->uid = 0; + state->pid = -1; /* not yet */ + signal_set(&state->sigchld, SIGCHLD, mode_exec_sig, state); + signal_set(&state->sigint, SIGINT, mode_exec_sig, state); + + if (user != NULL) { + struct passwd *pw; + + pw = getpwnam(user); + if (pw == NULL) + errx(1, "unable to lookup user %s", user); + + state->uid = pw->pw_uid; + state->gid = pw->pw_gid; + + endpwent(); + + state->ngroups = nitems(state->groups); + if (getgrouplist(user, pw->pw_gid, + state->groups, &state->ngroups) == -1) + errx(1, "unable to get groups for user %s", user); + } + + return (state); +} + +static int +mode_exec_run(struct lltrace *llt) +{ + struct mode_exec_state *state = llt->mode; + + signal_add(&state->sigchld, NULL); + signal_add(&state->sigint, NULL); + + state->pid = fork(); + switch (state->pid) { + case -1: + err(1, "unable to fork"); + /* NOTREACHED */ + case 0: /* child */ + break; + default: /* parent */ + return (0); + } + + if (state->uid != 0) { + if (setresgid(state->gid, state->gid, state->gid) == -1) + err(1, "setresgid %d", state->gid); + + if (setgroups(state->ngroups, state->groups) == -1) + err(1, "setgroups"); + + if (setresuid(state->uid, state->uid, state->uid) == -1) + err(1, "setresuid %d", state->uid); + } + + execvp(state->argv[0], state->argv); + + err(1, "exec %s", state->argv[0]); + return (-1); +}