Index: sys/arch/amd64/amd64/conf.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/conf.c,v retrieving revision 1.74 diff -u -p -r1.74 conf.c --- sys/arch/amd64/amd64/conf.c 11 Nov 2021 10:03:08 -0000 1.74 +++ sys/arch/amd64/amd64/conf.c 29 Apr 2022 02:18:11 -0000 @@ -136,6 +136,7 @@ cdev_decl(cy); #include "bktr.h" #include "ksyms.h" #include "kstat.h" +#include "kut.h" #include "usb.h" #include "uhid.h" #include "fido.h" @@ -212,7 +213,8 @@ struct cdevsw cdevsw[] = cdev_notdef(), /* 28 was LKM */ cdev_notdef(), /* 29 */ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ - cdev_notdef(), /* 31 */ + cdev_kutrace_init(NKUT,kutrace), + /* 31: kutrace */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ cdev_notdef(), /* 34 */ Index: sys/arch/amd64/include/cpu.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v retrieving revision 1.141 diff -u -p -r1.141 cpu.h --- sys/arch/amd64/include/cpu.h 31 Aug 2021 17:40:59 -0000 1.141 +++ sys/arch/amd64/include/cpu.h 29 Apr 2022 02:18:11 -0000 @@ -108,7 +108,6 @@ struct cpu_info { struct schedstate_percpu ci_schedstate; /* scheduler state */ struct cpu_info *ci_next; - struct proc *ci_curproc; u_int ci_cpuid; u_int ci_apicid; u_int ci_acpi_proc_id; @@ -119,9 +118,10 @@ struct cpu_info { u_int64_t ci_user_cr3; /* U-K page table */ /* bits for mitigating Micro-architectural Data Sampling */ - char ci_mds_tmp[32]; /* 32byte aligned */ + char ci_mds_tmp[32]; /* 32byte aligned */ void *ci_mds_buf; + struct proc *ci_curproc; struct pmap *ci_proc_pmap; /* last userspace pmap */ struct pcb *ci_curpcb; struct pcb *ci_idle_pcb; Index: sys/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v retrieving revision 1.284 diff -u -p -r1.284 GENERIC --- sys/conf/GENERIC 19 Apr 2022 01:32:06 -0000 1.284 +++ sys/conf/GENERIC 29 Apr 2022 02:18:11 -0000 @@ -82,6 +82,7 @@ pseudo-device endrun 1 # EndRun line dis pseudo-device vnd 4 # vnode disk devices pseudo-device ksyms 1 # kernel symbols device pseudo-device kstat # kernel statistics device +pseudo-device kut # kernel/userland tracing device # clonable devices pseudo-device bpfilter # packet filter Index: sys/conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.714 diff -u -p -r1.714 files --- sys/conf/files 19 Mar 2022 10:25:09 -0000 1.714 +++ sys/conf/files 29 Apr 2022 02:18:11 -0000 @@ -573,6 +573,9 @@ file dev/ksyms.c ksyms needs-flag pseudo-device kstat file dev/kstat.c kstat needs-flag +pseudo-device kut +file dev/kutrace.c kut needs-flag + pseudo-device fuse file miscfs/fuse/fuse_device.c fuse needs-flag file miscfs/fuse/fuse_file.c fuse Index: sys/dev/kutrace.c =================================================================== RCS file: sys/dev/kutrace.c diff -N sys/dev/kutrace.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/dev/kutrace.c 29 Apr 2022 02:18:11 -0000 @@ -0,0 +1,631 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#if defined(__amd64__) || defined(__i386__) + +static inline unsigned int +kutrace_cas(unsigned int *p, unsigned int e, unsigned int n) +{ + __asm volatile("cmpxchgl %2, %1" + : "=a" (e), "=m" (*p) + : "r" (n), "a" (e), "m" (*p)); + + return (e); +} + +static inline uint64_t +kutrace_ts(void) +{ + unsigned int hi, lo; + + __asm volatile("rdtsc" : "=d" (hi), "=a" (lo)); + + return (lo >> 6); +} + +static inline uint64_t +kutrace_ts_long(void) +{ + return (rdtsc() >> 6); +} + +#else /* not x86 */ + +static unsigned int +kutrace_cas(unsigned int *p, unsigned int e, unsigned int n) +{ + unsigned int o; + int s; + + s = intr_disable(); + o = *p; + if (o == e) + *p = n; + intr_restore(s); + + return (o); +} + +static inline uint64_t +kutrace_ts(void) +{ + return (countertime()); +} + +static inline uint64_t +kutrace_ts_long(void) +{ + return (countertime()); +} + +#endif + +#define KUTRACE_NSLOTS 8192 + +struct kutrace_cpu { + struct cpu_info *kut_ci; + + TAILQ_ENTRY(kutrace_cpu) + kut_entry; + unsigned int kut_posted; + unsigned int kut_drops; + + unsigned int kut_slot; + uint64_t kut_ring[KUTRACE_NSLOTS]; +}; + +TAILQ_HEAD(kutrace_cpus, kutrace_cpu); + +struct kutrace_softc { + unsigned int sc_running; + struct rwlock sc_lock; + + struct mutex sc_mtx; + struct kutrace_cpus sc_cpu_list; + unsigned int sc_reading; + struct selinfo sc_sel; +}; + +static int kutrace_start(struct kutrace_softc *, struct proc *); +static int kutrace_stop(struct kutrace_softc *, struct proc *); +static int kutrace_flush(struct kutrace_softc *, struct proc *); + +static struct kutrace_softc *kutrace_sc; + +int +kutattach(int num) +{ + return (0); +} + +int +kutraceopen(dev_t dev, int flag, int mode, struct proc *p) +{ + struct kutrace_softc *sc; + int error; + + if (minor(dev) != 0) + return (ENXIO); + + error = suser(p); + if (error != 0) + return (error); + + if (kutrace_sc != NULL) + return (EBUSY); + + sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (sc == NULL) + return (ENOMEM); + + sc->sc_running = 0; + rw_init(&sc->sc_lock, "kutlk"); + + mtx_init(&sc->sc_mtx, IPL_STATCLOCK); + TAILQ_INIT(&sc->sc_cpu_list); + sc->sc_reading = 0; + klist_init_mutex(&sc->sc_sel.si_note, &sc->sc_mtx); + + /* commit */ + if (atomic_cas_ptr(&kutrace_sc, NULL, sc) != NULL) { + free(sc, M_DEVBUF, sizeof(*sc)); + return (EBUSY); + } + + return (0); +} + +int +kutraceclose(dev_t dev, int flag, int mode, struct proc *p) +{ + struct kutrace_softc *sc = kutrace_sc; + + kutrace_stop(sc, p); + kutrace_flush(sc, p); + + kutrace_sc = NULL; + membar_sync(); + + free(sc, M_DEVBUF, sizeof(*sc)); + + return (0); +} + +static int +kutrace_fionread(struct kutrace_softc *sc) +{ + struct kutrace_cpu *kut; + + mtx_enter(&sc->sc_mtx); + kut = TAILQ_FIRST(&sc->sc_cpu_list); + mtx_leave(&sc->sc_mtx); + + return (kut != NULL ? sizeof(kut->kut_ring) : 0); +} + +static struct kutrace_cpu * +kutrace_cpu_create(struct cpu_info *ci, struct proc *p) +{ + struct kutrace_cpu *kut; + + kut = malloc(sizeof(*kut), M_DEVBUF, M_WAITOK); + kut->kut_ci = ci; + kut->kut_posted = 0; + kut->kut_drops = 0; + + /* first 8 slots are a sort of header */ + kut->kut_ring[0] = (uint64_t)cpu_number() << 56; + kut->kut_ring[1] = 0; + kut->kut_ring[2] = kutrace_ts_long(); + kut->kut_ring[3] = nsecuptime(); /* XXX wall clock time? */ + kut->kut_ring[4] = 0; /* ts at read time */ + kut->kut_ring[5] = 0; /* time at read time */ + kut->kut_ring[6] = 0; /* unused */ + kut->kut_ring[7] = 0; /* unused */ + + kut->kut_slot = 8; + + kutrace_proc(kut, p); + /* kutrace_pidname(kut, p); */ + + return (kut); +} + +static unsigned int +kutrace_cpu_post(struct kutrace_softc *sc, struct kutrace_cpu *kut) +{ + unsigned int reading; + + kut->kut_posted = 1; + + mtx_enter(&sc->sc_mtx); + TAILQ_INSERT_TAIL(&sc->sc_cpu_list, kut, kut_entry); + KNOTE(&sc->sc_sel.si_note, 0); + + reading = sc->sc_reading; + mtx_leave(&sc->sc_mtx); + + return (reading); +} + +static int +kutrace_start(struct kutrace_softc *sc, struct proc *p) +{ + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + int error; + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + return (error); + + if (sc->sc_running) { + error = EALREADY; + goto leave; + } + sc->sc_running = 1; + + CPU_INFO_FOREACH(cii, ci) { + sched_peg_curproc(ci); + ci->ci_schedstate.spc_kutrace = kutrace_cpu_create(ci, p); + } + atomic_clearbits_int(&p->p_flag, P_CPUPEG); + +leave: + rw_exit(&sc->sc_lock); + return (error); +} + +static int +kutrace_stop(struct kutrace_softc *sc, struct proc *p) +{ + struct kutrace_cpu *kut; + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + unsigned long s; + int error; + unsigned int reading = 0; + + printf("kutrace stopping\n"); + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + return (error); + + if (!sc->sc_running) { + error = EALREADY; + goto leave; + } + sc->sc_running = 0; + + /* visit each cpu to take kut away safely */ + CPU_INFO_FOREACH(cii, ci) { + sched_peg_curproc(ci); + + s = intr_disable(); + kut = ci->ci_schedstate.spc_kutrace; + ci->ci_schedstate.spc_kutrace = NULL; + intr_restore(s); + + if (!kut->kut_posted) + reading |= kutrace_cpu_post(sc, kut); + } + atomic_clearbits_int(&p->p_flag, P_CPUPEG); + + // if (reading) + wakeup(&sc->sc_cpu_list); + +leave: + rw_exit(&sc->sc_lock); + return (error); +} + +static int +kutrace_flush(struct kutrace_softc *sc, struct proc *p) +{ + struct kutrace_cpu *kut, *nkut; + + mtx_enter(&sc->sc_mtx); + kut = TAILQ_FIRST(&sc->sc_cpu_list); + TAILQ_INIT(&sc->sc_cpu_list); + mtx_leave(&sc->sc_mtx); + + while (kut != NULL) { + nkut = TAILQ_NEXT(kut, kut_entry); + + free(kut, M_DEVBUF, sizeof(*kut)); + + kut = nkut; + } + + return (0); +} + +int +kutraceioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) +{ + struct kutrace_softc *sc = kutrace_sc; + int error = 0; + + KERNEL_UNLOCK(); + + switch (cmd) { + case FIONREAD: + *(int *)data = kutrace_fionread(sc); + break; + case FIONBIO: + /* vfs tracks this for us if we let it */ + break; + + case KUTIOCSTART: + error = kutrace_start(sc, p); + break; + case KUTIOCSTOP: + error = kutrace_stop(sc, p); + break; + case KUTIOCFLUSH: + error = kutrace_flush(sc, p); + break; + + default: + error = ENOTTY; + break; + } + + KERNEL_LOCK(); + + return (error); +} + +int +kutraceread(dev_t dev, struct uio *uio, int ioflag) +{ + struct kutrace_softc *sc = kutrace_sc; + struct kutrace_cpu *kut, *nkut = NULL; + struct cpu_info *ci; + unsigned long s; + int error; + + mtx_enter(&sc->sc_mtx); + kut = TAILQ_FIRST(&sc->sc_cpu_list); + if (kut == NULL) { + if (ISSET(ioflag, IO_NDELAY)) { + mtx_leave(&sc->sc_mtx); + return (EWOULDBLOCK); + } + + do { + if (!sc->sc_running) { + mtx_leave(&sc->sc_mtx); + return (ENOENT); + } + + sc->sc_reading++; + error = msleep_nsec(&sc->sc_cpu_list, &sc->sc_mtx, + PRIBIO|PCATCH, "kutrace", INFSLP); + sc->sc_reading--; + if (error != 0) { + mtx_leave(&sc->sc_mtx); + return (error); + } + + kut = TAILQ_FIRST(&sc->sc_cpu_list); + } while (kut == NULL); + } + TAILQ_REMOVE(&sc->sc_cpu_list, kut, kut_entry); + mtx_leave(&sc->sc_mtx); + + ci = kut->kut_ci; + + rw_enter_read(&sc->sc_lock); + + sched_peg_curproc(ci); + kut->kut_ring[0] |= kut->kut_drops; + kut->kut_ring[4] = kutrace_ts_long(); + kut->kut_ring[5] = nsecuptime(); /* XXX wall clock time? */ + + if (sc->sc_running) + nkut = kutrace_cpu_create(ci, curproc); + + s = intr_disable(); + ci->ci_schedstate.spc_kutrace = nkut; + intr_restore(s); + atomic_clearbits_int(&curproc->p_flag, P_CPUPEG); + + rw_exit_read(&sc->sc_lock); + + error = uiomove(kut->kut_ring, + kut->kut_slot * sizeof(kut->kut_ring[0]), uio); + + free(kut, M_DEVBUF, sizeof(*kut)); + + return (error); +} + +static void +kutrace_filt_detach(struct knote *kn) +{ + struct kutrace_softc *sc = kn->kn_hook; + + klist_remove(&sc->sc_sel.si_note, kn); +} + +static int +kutrace_filt_event(struct knote *kn, long hint) +{ + struct kutrace_softc *sc = kn->kn_hook; + struct kutrace_cpu *kut; + + kut = TAILQ_FIRST(&sc->sc_cpu_list); + kn->kn_data = kut == NULL ? 0 : sizeof(kut->kut_ring); + + return (kn->kn_data > 0); +} + +static int +kutrace_filt_modify(struct kevent *kev, struct knote *kn) +{ + struct kutrace_softc *sc = kn->kn_hook; + int active; + + mtx_enter(&sc->sc_mtx); + active = knote_modify_fn(kev, kn, kutrace_filt_event); + mtx_leave(&sc->sc_mtx); + + return (active); +} + +static int +kutrace_filt_process(struct knote *kn, struct kevent *kev) +{ + struct kutrace_softc *sc = kn->kn_hook; + int active; + + mtx_enter(&sc->sc_mtx); + active = knote_process_fn(kn, kev, kutrace_filt_event); + mtx_leave(&sc->sc_mtx); + + return (active); +} + +static const struct filterops kutrace_filtops = { + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, + .f_attach = NULL, + .f_detach = kutrace_filt_detach, + .f_event = kutrace_filt_event, + .f_modify = kutrace_filt_modify, + .f_process = kutrace_filt_process, +}; + +int +kutracekqfilter(dev_t dev, struct knote *kn) +{ + struct kutrace_softc *sc = kutrace_sc; + struct klist *klist; + + switch (kn->kn_filter) { + case EVFILT_READ: + klist = &sc->sc_sel.si_note; + kn->kn_fop = &kutrace_filtops; + break; + default: + return (EINVAL); + } + + kn->kn_hook = sc; + klist_insert(klist, kn); + + return (0); +} + +static unsigned int +kutrace_insert(struct kutrace_cpu *kut, uint64_t record, + const uint64_t *extra, unsigned int n) +{ + unsigned int slot, oslot, nslot; + uint64_t *slots; + + n++; + record |= kutrace_ts() << KUTRACE_TIMESTAMP_SHIFT; + + slot = kut->kut_slot; + for (;;) { + nslot = slot + n; + if (nslot > nitems(kut->kut_ring)) { + atomic_inc_int(&kut->kut_drops); + return (slot); + } + + oslot = kutrace_cas(&kut->kut_slot, slot, nslot); + if (slot == oslot) + break; + + slot = oslot; + } + + slots = kut->kut_ring + slot; + *slots = record; + while (n > 1) { + *(++slots) = *(extra++); + n--; + } + + return (nslot); +} + +void +kutrace_statclock(struct kutrace_cpu *kut, int usermode, unsigned long pc) +{ + uint64_t extra[1] = { pc }; + uint64_t event = KUTRACE_EVENT_PIDNAME | + KUTRACE_EVENT_LEN(nitems(extra)); + unsigned int slot; + + slot = kutrace_insert(kut, event << KUTRACE_EVENT_SHIFT, + extra, nitems(extra)); + + if (!kut->kut_posted && slot > (nitems(kut->kut_ring) / 2)) { + struct kutrace_softc *sc = kutrace_sc; + + if (kutrace_cpu_post(sc, kut)) { + printf("kutrace posting on cpu %u\n", cpu_number()); + wakeup_one(&sc->sc_cpu_list); + } + } +} + +void +kutrace_syscall(struct kutrace_cpu *kut, register_t code, + size_t argsize, const register_t *args) +{ + uint64_t record = KUTRACE_EVENT_SYSCALL(code) << KUTRACE_EVENT_SHIFT; + + if (argsize > 0) { + uint64_t arg0 = args[0] & KUTRACE_ARG0_MASK; + record |= arg0 << KUTRACE_ARG0_SHIFT; + } + + kutrace_insert(kut, record, NULL, 0); +} + +void +kutrace_sysret(struct kutrace_cpu *kut, register_t code, + int error, const register_t retvals[2]) +{ + uint64_t record = KUTRACE_EVENT_SYSRET(code) << KUTRACE_EVENT_SHIFT; + uint64_t arg0 = error & KUTRACE_ARG0_MASK; + record |= arg0 << KUTRACE_ARG0_SHIFT; + + kutrace_insert(kut, record, NULL, 0); +} + +void +kutrace_proc(struct kutrace_cpu *kut, struct proc *p) +{ + uint64_t record = KUTRACE_EVENT_USERPID << KUTRACE_EVENT_SHIFT; + record |= (p->p_tid & KUTRACE_ARG0_MASK) << KUTRACE_ARG0_SHIFT; + + kutrace_insert(kut, record, NULL, 0); + kutrace_pidname(kut, p); +} + +void +kutrace_pidname(struct kutrace_cpu *kut, struct proc *p) +{ + uint64_t record; + uint64_t extra[3]; + unsigned int l, n; + + CTASSERT(sizeof(extra) == sizeof(p->p_p->ps_comm)); + + extra[0] = extra[1] = extra[2] = 0; /* memset */ + l = strlcpy((char *)extra, p->p_p->ps_comm, sizeof(extra)); + + /* turn the string length into the number of slots we need */ + n = howmany(l, sizeof(uint64_t)); + + record = (KUTRACE_EVENT_PIDNAME | KUTRACE_EVENT_LEN(n)) << + KUTRACE_EVENT_SHIFT; + + kutrace_insert(kut, record, extra, n); +} + +void +kutrace_runnable(struct kutrace_cpu *kut, struct proc *p) +{ + uint64_t record = KUTRACE_EVENT_RUNNABLE << KUTRACE_EVENT_SHIFT; + record |= (p->p_tid & KUTRACE_ARG0_MASK) << KUTRACE_ARG0_SHIFT; + + kutrace_insert(kut, record, NULL, 0); +} Index: sys/kern/kern_clock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_clock.c,v retrieving revision 1.103 diff -u -p -r1.103 kern_clock.c --- sys/kern/kern_clock.c 16 Feb 2022 08:01:32 -0000 1.103 +++ sys/kern/kern_clock.c 29 Apr 2022 02:18:11 -0000 @@ -60,6 +60,11 @@ #include #endif +#include "kut.h" +#if NKUT > 0 +#include +#endif + /* * Clock handling routines. * @@ -327,6 +332,17 @@ statclock(struct clockframe *frame) setstatclockrate(profhz); } } + +#if NKUT > 0 + { + struct kutrace_cpu *kut = kutrace_enter_cpu(ci); + if (kut != NULL) { + kutrace_statclock(kut, + CLKF_USERMODE(frame), CLKF_PC(frame)); + } + kutrace_leave(kut); + } +#endif if (CLKF_USERMODE(frame)) { pr = p->p_p; Index: sys/kern/kern_exec.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exec.c,v retrieving revision 1.230 diff -u -p -r1.230 kern_exec.c --- sys/kern/kern_exec.c 22 Feb 2022 17:14:14 -0000 1.230 +++ sys/kern/kern_exec.c 29 Apr 2022 02:18:11 -0000 @@ -33,6 +33,8 @@ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "kut.h" + #include #include #include @@ -67,6 +69,10 @@ #include +#if NKUT > 0 +#include +#endif + struct uvm_object *sigobject; /* shared sigcode object */ struct uvm_object *timekeep_object; struct timekeep *timekeep; @@ -510,6 +516,17 @@ sys_execve(struct proc *p, void *v, regi memset(pr->ps_comm, 0, sizeof(pr->ps_comm)); strlcpy(pr->ps_comm, nid.ni_cnd.cn_nameptr, sizeof(pr->ps_comm)); pr->ps_acflag &= ~AFORK; + +#if NKUT > 0 + { + struct kutrace_cpu *kut; + + kut = kutrace_enter(); + if (kut != NULL) + kutrace_pidname(kut, p); + kutrace_leave(kut); + } +#endif /* record proc's vnode, for use by sysctl */ otvp = pr->ps_textvp; Index: sys/kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.185 diff -u -p -r1.185 kern_synch.c --- sys/kern/kern_synch.c 18 Mar 2022 15:32:06 -0000 1.185 +++ sys/kern/kern_synch.c 29 Apr 2022 02:18:11 -0000 @@ -37,6 +37,8 @@ * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 */ +#include "kut.h" + #include #include #include @@ -65,6 +67,10 @@ #include #endif +#if NKUT > 0 +#include +#endif + int sleep_signal_check(void); int thrsleep(struct proc *, struct sys___thrsleep_args *); int thrsleep_unlock(void *); @@ -544,6 +550,16 @@ unsleep(struct proc *p) p->p_wchan = NULL; TRACEPOINT(sched, wakeup, p->p_tid + THREAD_PID_OFFSET, p->p_p->ps_pid); +#if NKUT > 0 + { + struct kutrace_cpu *kut; + + kut = kutrace_enter(); + if (kut != NULL) + kutrace_runnable(kut, p); + kutrace_leave(kut); + } +#endif } } Index: sys/kern/kern_tc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_tc.c,v retrieving revision 1.75 diff -u -p -r1.75 kern_tc.c --- sys/kern/kern_tc.c 24 Oct 2021 00:02:25 -0000 1.75 +++ sys/kern/kern_tc.c 29 Apr 2022 02:18:11 -0000 @@ -140,6 +140,14 @@ tc_delta(struct timehands *th) tc->tc_counter_mask); } +unsigned int +countertime(void) +{ + struct timecounter *tc = timehands->th_counter; + + return (tc->tc_get_timecount(tc)); +} + /* * Functions for reading the time. We have to loop until we are sure that * the timehands that we operated on was not updated under our feet. See Index: sys/kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.70 diff -u -p -r1.70 sched_bsd.c --- sys/kern/sched_bsd.c 30 Oct 2021 23:24:48 -0000 1.70 +++ sys/kern/sched_bsd.c 29 Apr 2022 02:18:11 -0000 @@ -37,6 +37,8 @@ * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 */ +#include "kut.h" + #include #include #include @@ -54,6 +56,9 @@ #include #endif +#if NKUT > 0 +#include +#endif int lbolt; /* once a second sleep address */ int rrticks_init; /* # of hardclock ticks per roundrobin() */ @@ -370,6 +375,18 @@ mi_switch(void) if (p != nextproc) { uvmexp.swtch++; + +#if NKUT > 0 + { + struct kutrace_cpu * kut; + + kut = SMR_PTR_GET(&spc->spc_kutrace); + if (kut != NULL) + kutrace_proc(kut, nextproc); + kutrace_leave(kut); + } +#endif + TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET, nextproc->p_p->ps_pid); cpu_switchto(p, nextproc); Index: sys/sys/conf.h =================================================================== RCS file: /cvs/src/sys/sys/conf.h,v retrieving revision 1.156 diff -u -p -r1.156 conf.h --- sys/sys/conf.h 23 Jan 2021 05:08:36 -0000 1.156 +++ sys/sys/conf.h 29 Apr 2022 02:18:11 -0000 @@ -335,6 +335,13 @@ extern struct cdevsw cdevsw[]; (dev_type_stop((*))) enodev, 0, selfalse, \ (dev_type_mmap((*))) enodev } +/* open, close, read, ioctl, poll, kqfilter */ +#define cdev_kutrace_init(c,n) { \ + dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \ + (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \ + (dev_type_stop((*))) enodev, 0, selfalse, \ + (dev_type_mmap((*))) enodev, 0, 0, dev_init(c,n,kqfilter) } + /* open, close, read, write, ioctl, stop, tty, poll, mmap, kqfilter */ #define cdev_wsdisplay_init(c,n) { \ dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \ @@ -620,6 +627,7 @@ cdev_decl(wsmux); cdev_decl(ksyms); cdev_decl(kstat); +cdev_decl(kutrace); cdev_decl(bio); cdev_decl(vscsi); Index: sys/sys/kutrace.h =================================================================== RCS file: sys/sys/kutrace.h diff -N sys/sys/kutrace.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sys/kutrace.h 29 Apr 2022 02:18:11 -0000 @@ -0,0 +1,110 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_KUTRACE_H_ +#define _SYS_KUTRACE_H_ + +#define KUTIOCSTART _IO('t',128) +#define KUTIOCSTOP _IO('t',129) +#define KUTIOCFLUSH _IO('t',130) + +#define KUTRACE_ARG0_SHIFT 0 +#define KUTRACE_ARG0_BITS 16 +#define KUTRACE_ARG0_MASK ((1ULL << KUTRACE_ARG0_BITS) - 1) + +#define KUTRACE_RETVAL_SHIFT 16 +#define KUTRACE_RETVAL_BITS 8 +#define KUTRACE_RETVAL_MASK ((1ULL << KUTRACE_RETVAL_BITS) - 1) + +#define KUTRACE_EVENT_SHIFT 32 +#define KUTRACE_EVENT_BITS 12 +#define KUTRACE_EVENT_MASK ((1ULL << KUTRACE_EVENT_BITS) - 1) + +#define KUTRACE_TIMESTAMP_SHIFT 44 +#define KUTRACE_TIMESTAMP_BITS 20 +#define KUTRACE_TIMESTAMP_MASK ((1ULL << KUTRACE_TIMESTAMP_BITS) - 1) + +/* + * kutrace event types + */ +#define KUTRACE_EVENT_LEN(_n) ((uint64_t)(_n) << 4) + +#define KUTRACE_EVENT_PIDNAME (0x002ULL) +#define KUTRACE_EVENT_PAGEFAULT (0x00eULL) + +#define KUTRACE_EVENT_USERPID (0x200ULL) +#define KUTRACE_EVENT_RUNNABLE (0x206ULL) +#define KUTRACE_EVENT_IPI (0x207ULL) +#define KUTRACE_EVENT_MWAIT (0x208ULL) +#define KUTRACE_EVENT_PSTATE (0x209ULL) + +#define KUTRACE_EVENT_PC (0x280ULL) + +/* these are in blocks of 256 */ +#define KUTRACE_EVENT_TRAP(_c) (0x400ULL) +#define KUTRACE_EVENT_IRQ(_c) (0x500ULL) +#define KUTRACE_EVENT_TRAPRET(_c) (0x600ULL) +#define KUTRACE_EVENT_IRQRET(_c) (0x700ULL) + +/* these are in blocks of 512 */ +#define KUTRACE_SYSCALL_MASK(_code) ((uint64_t)(_code) & 0x1ff) + +#define KUTRACE_EVENT_SYSCALL(_code) (0x800ULL | KUTRACE_SYSCALL_MASK(_code)) +#define KUTRACE_EVENT_SYSRET(_code) (0xa00ULL | KUTRACE_SYSCALL_MASK(_code)) + +#ifdef _KERNEL + +struct kutrace_cpu; + +static inline struct kutrace_cpu * +kutrace_enter_spc(struct schedstate_percpu *spc) +{ + return (spc->spc_kutrace); +} + +static inline struct kutrace_cpu * +kutrace_enter_cpu(struct cpu_info *ci) +{ + return kutrace_enter_spc(&ci->ci_schedstate); +} + +static inline struct kutrace_cpu * +kutrace_enter(void) +{ + return kutrace_enter_cpu(curcpu()); +} + +static inline void +kutrace_leave(struct kutrace_cpu *kut) +{ + /* nop */ +} + +void kutrace_statclock(struct kutrace_cpu *, int, unsigned long); + +void kutrace_syscall(struct kutrace_cpu *, register_t, + size_t, const register_t *); +void kutrace_sysret(struct kutrace_cpu *, register_t, + int, const register_t [2]); +void kutrace_proc(struct kutrace_cpu *, struct proc *); +void kutrace_pidname(struct kutrace_cpu *, struct proc *); +void kutrace_runnable(struct kutrace_cpu *, struct proc *); + +#endif /* _KERNEL */ + +#endif /* _SYS_KUTRACE_H_ */ Index: sys/sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v retrieving revision 1.57 diff -u -p -r1.57 sched.h --- sys/sys/sched.h 25 Dec 2020 12:49:31 -0000 1.57 +++ sys/sys/sched.h 29 Apr 2022 02:18:11 -0000 @@ -91,11 +91,13 @@ #define SCHED_NQS 32 /* 32 run queues. */ struct smr_entry; +struct kutrace_cpu; /* * Per-CPU scheduler state. */ struct schedstate_percpu { + struct kutrace_cpu *spc_kutrace; struct proc *spc_idleproc; /* idle proc for this cpu */ TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS]; LIST_HEAD(,proc) spc_deadproc; Index: sys/sys/syscall_mi.h =================================================================== RCS file: /cvs/src/sys/sys/syscall_mi.h,v retrieving revision 1.25 diff -u -p -r1.25 syscall_mi.h --- sys/sys/syscall_mi.h 21 Jan 2020 16:16:23 -0000 1.25 +++ sys/sys/syscall_mi.h 29 Apr 2022 02:18:11 -0000 @@ -45,6 +45,10 @@ #include #endif +#include "kut.h" +#if NKUT > 0 +#include +#endif /* * The MD setup for a system call has been done; here's the MI part. @@ -76,6 +80,14 @@ mi_syscall(struct proc *p, register_t co KERNEL_UNLOCK(); } #endif +#if NKUT > 0 + { + struct kutrace_cpu *kut = kutrace_enter_cpu(p->p_cpu); + if (kut != NULL) + kutrace_syscall(kut, code, callp->sy_argsize, argp); + kutrace_leave(kut); + } +#endif /* SP must be within MAP_STACK space */ if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), @@ -113,6 +125,14 @@ static inline void mi_syscall_return(struct proc *p, register_t code, int error, const register_t retval[2]) { +#if NKUT > 0 + { + struct kutrace_cpu *kut = kutrace_enter_cpu(p->p_cpu); + if (kut != NULL) + kutrace_sysret(kut, code, error, retval); + kutrace_leave(kut); + } +#endif #ifdef SYSCALL_DEBUG KERNEL_LOCK(); scdebug_ret(p, code, error, retval); @@ -140,10 +160,19 @@ mi_syscall_return(struct proc *p, regist static inline void mi_child_return(struct proc *p) { -#if defined(SYSCALL_DEBUG) || defined(KTRACE) || NDT > 0 +#if defined(SYSCALL_DEBUG) || defined(KTRACE) || NDT > 0 || NKUT > 0 int code = (p->p_flag & P_THREAD) ? SYS___tfork : (p->p_p->ps_flags & PS_PPWAIT) ? SYS_vfork : SYS_fork; const register_t child_retval[2] = { 0, 1 }; +#endif + +#if NKUT > 0 + { + struct kutrace_cpu *kut = kutrace_enter_cpu(p->p_cpu); + if (kut != NULL) + kutrace_sysret(kut, code, 0, child_retval); + kutrace_leave(kut); + } #endif TRACEPOINT(sched, on__cpu, NULL); Index: sys/sys/time.h =================================================================== RCS file: /cvs/src/sys/sys/time.h,v retrieving revision 1.61 diff -u -p -r1.61 time.h --- sys/sys/time.h 19 Jun 2021 13:49:39 -0000 1.61 +++ sys/sys/time.h 29 Apr 2022 02:18:11 -0000 @@ -313,6 +313,8 @@ time_t getuptime(void); uint64_t nsecuptime(void); uint64_t getnsecuptime(void); +unsigned int countertime(void); + struct proc; int clock_gettime(struct proc *, clockid_t, struct timespec *); Index: usr.sbin/kutrace/Makefile =================================================================== RCS file: usr.sbin/kutrace/Makefile diff -N usr.sbin/kutrace/Makefile --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.sbin/kutrace/Makefile 29 Apr 2022 02:18:11 -0000 @@ -0,0 +1,13 @@ +# $OpenBSD$ + +PROG= kutrace +SRCS= kutrace.c +MAN= + +LDADD= -levent +DPADD= ${LIBEVENT} + +WARNINGS= Yes +DEBUG= -g + +.include Index: usr.sbin/kutrace/kutrace.c =================================================================== RCS file: usr.sbin/kutrace/kutrace.c diff -N usr.sbin/kutrace/kutrace.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.sbin/kutrace/kutrace.c 29 Apr 2022 02:18:11 -0000 @@ -0,0 +1,641 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifndef nitems +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#endif + +#define DEV_KUTRACE "/dev/kutrace" + +struct ring { + uint64_t slots[8192]; +}; + +#define NRINGS_DEFAULT 256 /* 256 * 8192 * 8 is 16MB */ + +struct kutrace; + +struct mode { + const char *name; + void *(*setup)(struct kutrace *, int, char **); + int (*run)(struct kutrace *); +}; + +static void *mode_kill_setup(struct kutrace *, int, char **); +static int mode_kill_run(struct kutrace *); + +static const struct mode mode_kill = { + "kill", mode_kill_setup, mode_kill_run +}; + +static void *mode_wait_setup(struct kutrace *, int, char **); +static int mode_wait_run(struct kutrace *); +static void *mode_exec_setup(struct kutrace *, int, char **); +static int mode_exec_run(struct kutrace *); + +static const struct mode modes[] = { + { "wait", mode_wait_setup, mode_wait_run }, + { "exec", mode_exec_setup, mode_exec_run }, +}; + +static const struct mode * + mode_lookup(const char *); +static const char *outfile_default(void); + +__dead static void +usage(void) +{ + extern char *__progname; + + fprintf(stderr, "usage: %s [-v] [-o output] [command]\n", __progname); + fprintf(stderr, " %s wait seconds\n", __progname); + fprintf(stderr, " %s exec program ...\n", __progname); + + exit(-1); +} + +struct kutrace { + const char *outfile; + int dv; /* /dev/kutrace fd */ + int of; /* outfile fd */ + void *mode; + + struct event dv_ev; /* handle reading from the kernel */ + + size_t nrings; + struct ring *rings; + size_t ring_idx; + + uint64_t nsec_first; + uint64_t nsec_last; + uint64_t count_rings; + uint64_t count_slots; +}; + +static void kutrace_start(struct kutrace *); +static void kutrace_stop(struct kutrace *); + +static void kutrace_read(int, short, void *); +static void kutrace_flush(struct kutrace *); + +int +main(int argc, char *argv[]) +{ + const struct mode *mode = &mode_kill; + int ch; + const char *errstr; + int verbose = 0; + + struct kutrace kutrace = { + .outfile = NULL, + .nrings = NRINGS_DEFAULT, + + .nsec_first = ~0, + .nsec_last = 0, + .count_rings = 0, + .count_slots = 0, + }; + struct kutrace *kut = &kutrace; + + while ((ch = getopt(argc, argv, "n:o:v")) != -1) { + switch (ch) { + case 'n': + kut->nrings = strtonum(optarg, 4, 4096, &errstr); + if (errstr != NULL) { + errx(1, "number of rings %s: %s", + optarg, errstr); + } + break; + case 'o': + kut->outfile = optarg; + break; + case 'v': + verbose = 1; + break; + default: + usage(); + /* NOTREACHED */ + } + } + + argc -= optind; + argv += optind; + + optreset = optind = opterr = 1; /* kill mode has to be careful */ + + if (argc > 0) { + mode = mode_lookup(argv[0]); + if (mode == NULL) + errx(1, "unknown mode %s", argv[0]); + } + + if (kut->outfile == NULL) + kut->outfile = outfile_default(); + + event_init(); + + kut->mode = (*mode->setup)(kut, argc, argv); + + kut->dv = open(DEV_KUTRACE, O_NONBLOCK|O_RDWR|O_CLOEXEC); + if (kut->dv == -1) + err(1, "%s", DEV_KUTRACE); + + event_set(&kut->dv_ev, kut->dv, EV_READ|EV_PERSIST, + kutrace_read, kut); + + kut->of = open(kut->outfile, O_WRONLY|O_CREAT|O_CLOEXEC, 0640); + if (kut->of == -1) + err(1, "open %s", kut->outfile); + + kut->rings = calloc(kut->nrings, sizeof(*kut->rings)); + if (kut->rings == NULL) + err(1, "unable to allocate %zu rings", kut->nrings); + + kut->ring_idx = 0; + + if ((*mode->run)(kut) == -1) + exit(1); + + kutrace_start(kut); + + event_dispatch(); + + if (kut->ring_idx != 0) + kutrace_flush(kut); + + if (verbose) { + uint64_t diff = kut->nsec_last - kut->nsec_first; + double interval = (double)diff / 1000000000.0; + int mib[] = { CTL_HW, HW_NCPU }; + int ncpus; + size_t ncpuslen = sizeof(ncpus); + + if (sysctl(mib, nitems(mib), &ncpus, &ncpuslen, NULL, 0) == -1) + err(1, "sysctl hw.ncpus"); + + printf("output file: %s\n", kut->outfile); + printf("interval: %.03lfs, ncpus: %d\n", interval, ncpus); + printf("rings: %llu (%.01lf/cpu/s), " + "slots: %llu (%.01lf/cpu/s)\n", + kut->count_rings, kut->count_rings / interval / ncpus, + kut->count_slots, kut->count_slots / interval / ncpus); + } + + return (0); +} + +static void +kutrace_start(struct kutrace *kut) +{ + event_add(&kut->dv_ev, NULL); + + if (ioctl(kut->dv, _IO('t', 128)) == -1) + err(1, "kutrace start"); +} + +static void +kutrace_flush(struct kutrace *kut) +{ + size_t len; + ssize_t rv; + + len = kut->ring_idx * sizeof(*kut->rings); + rv = write(kut->of, kut->rings, len); + if (rv == -1) + err(1, "%s write", kut->outfile); + + if ((size_t)rv < len) { + errx(1, "%s write short (%zd/%zu bytes)", + kut->outfile, rv, len); + } +} + +static int +kutrace_read_one(struct kutrace *kut) +{ + struct ring *ring; + ssize_t rv; + uint64_t nsec; + + if (kut->ring_idx >= kut->nrings) { + size_t i, j; + + kutrace_flush(kut); + + /* reset */ + kut->ring_idx = 0; + + /* + * memset(kut->rings, 0, + * kut->nrings * sizeof(*kut->rings)); + */ + for (i = 0; i < kut->nrings; i++) { + ring = kut->rings + i; + + for (j = 0; j < nitems(ring->slots); j++) + ring->slots[j] = 0; + } + } + + ring = kut->rings + kut->ring_idx; + rv = read(kut->dv, ring, sizeof(*ring)); + if (rv == -1) { + switch (errno) { + case EAGAIN: + /* try again later */ + return (EAGAIN); + case ENOENT: + /* we're done */ + event_del(&kut->dv_ev); + return (ENOENT); + default: + err(1, "%s read", DEV_KUTRACE); + /* NOTREACHED */ + } + } + + kut->ring_idx++; + + nsec = ring->slots[3]; + if (nsec < kut->nsec_first) + kut->nsec_first = nsec; + + nsec = ring->slots[5]; + if (nsec > kut->nsec_last) + kut->nsec_last = nsec; + + kut->count_rings++; + kut->count_slots += rv / sizeof(uint64_t); + + return (0); +} + +static void +kutrace_read(int dv, short events, void *arg) +{ + struct kutrace *kut = arg; + + kutrace_read_one(kut); +} + +static void +kutrace_stop(struct kutrace *kut) +{ + int error; + + if (ioctl(kut->dv, _IO('t', 129)) == -1) + err(1, "kutrace stop"); + + do { + error = kutrace_read_one(kut); + } while (error == 0); + + event_del(&kut->dv_ev); +} + +static const char * +outfile_default(void) +{ + extern char *__progname; + char host[MAXHOSTNAMELEN]; + time_t now; + struct tm *tm; + char *outfile; + + if (gethostname(host, sizeof(host)) == -1) + err(1, "gethostname"); + + now = time(NULL); + + tm = localtime(&now); + + if (asprintf(&outfile, "%s_%04d%02d%02d_%02d%02d%02d_%s.trace", + __progname, + tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, tm->tm_min, tm->tm_sec, + host) == -1) + errx(1, "error generating default output filename"); + + return (outfile); +} + +#if 0 +static int +printable(int ch) +{ + if (ch == '\0') + return ('_'); + if (!isprint(ch)) + return ('~'); + + return (ch); +} + +static void +hexdump(const void *d, size_t datalen) +{ + const uint8_t *data = d; + size_t i, j = 0; + + for (i = 0; i < datalen; i += j) { +#if 0 + printf("%04zu: ", i); + for (j = 0; j < 16 && i+j < datalen; j++) + printf("%02x ", data[i + j]); + while (j++ < 16) + printf(" "); +#endif + printf("|"); + + for (j = 0; j < 16 && i+j < datalen; j++) + putchar(printable(data[i + j])); + printf("|\n"); + } +} +#endif + +static const struct mode * +mode_lookup(const char *name) +{ + size_t i; + + for (i = 0; i < nitems(modes); i++) { + const struct mode *mode = &modes[i]; + + if (strcmp(mode->name, name) == 0) + return (mode); + } + + return (NULL); +} + +static void +mode_kill_event(int nil, short events, void *arg) +{ + struct kutrace *kut = arg; + struct event *ev = kut->mode; + + fprintf(stdout, "kutrace stopped\n"); + fflush(stdout); + + event_del(ev); + + kutrace_stop(kut); +} + +static void * +mode_kill_setup(struct kutrace *kut, int argc, char *argv[]) +{ + struct event *ev; + + if (argc != 0) + usage(); + + ev = malloc(sizeof(*ev)); + if (ev == NULL) + err(1, NULL); + + signal_set(ev, SIGINT, mode_kill_event, kut); + return (ev); +} + +static int +mode_kill_run(struct kutrace *kut) +{ + struct event *ev = kut->mode; + + signal_add(ev, NULL); + + fprintf(stdout, "kutrace starting, press Ctrl-C to end...\n"); + fflush(stdout); + + return (0); +} + +/* + * kutrace for specified number of seconds. + */ + +struct mode_wait_state { + struct kutrace *kut; + struct timeval tv; + struct event tmo; + struct event sig; +}; + +static void +mode_wait_tmo(int wat, short events, void *arg) +{ + struct mode_wait_state *state = arg; + struct kutrace *kut = state->kut; + + signal_del(&state->sig); + kutrace_stop(kut); +} + +static void +mode_wait_sig(int wat, short events, void *arg) +{ + struct mode_wait_state *state = arg; + struct kutrace *kut = state->kut; + + evtimer_del(&state->tmo); + signal_del(&state->sig); + kutrace_stop(kut); +} + +static void * +mode_wait_setup(struct kutrace *kut, int argc, char *argv[]) +{ + struct mode_wait_state *state; + const char *errstr; + + if (argc != 2) + usage(); + + state = malloc(sizeof(*state)); + if (state == NULL) + err(1, NULL); + + state->kut = kut; + + state->tv.tv_sec = strtonum(argv[1], 1, 600, &errstr); + if (errstr != NULL) + errx(1, "wait time %s: %s", argv[1], errstr); + + state->tv.tv_usec = 0; + + evtimer_set(&state->tmo, mode_wait_tmo, state); + signal_set(&state->sig, SIGINT, mode_wait_sig, state); + + return (state); +} + +static int +mode_wait_run(struct kutrace *kut) +{ + struct mode_wait_state *state = kut->mode; + + evtimer_add(&state->tmo, &state->tv); + signal_add(&state->sig, NULL); + + return (0); +} + +/* + * trace the execution of a (child) program + */ + +struct mode_exec_state { + struct kutrace *kut; + + char **argv; + + pid_t pid; + struct event sigchld; + struct event sigint; + + uid_t uid; + gid_t gid; + gid_t groups[NGROUPS_MAX]; + int ngroups; +}; + +static void +mode_exec_sig(int wat, short events, void *arg) +{ + struct mode_exec_state *state = arg; + struct kutrace *kut = state->kut; + + /* do we check the pid? */ + + signal_del(&state->sigchld); + signal_del(&state->sigint); + kutrace_stop(kut); +} + +static void * +mode_exec_setup(struct kutrace *kut, int argc, char *argv[]) +{ + struct mode_exec_state *state; + const char *user = NULL; + int ch; + + while ((ch = getopt(argc, argv, "u:")) != -1) { + switch (ch) { + case 'u': + user = optarg; + break; + default: + usage(); + /* NOTREACHED */ + } + } + + argc -= optind; + argv += optind; + + if (argc == 0) { + warnx("no command specified"); + usage(); + } + + state = malloc(sizeof(*state)); + if (state == NULL) + err(1, NULL); + + state->kut = kut; + state->argv = argv; + state->uid = 0; + state->pid = -1; /* not yet */ + signal_set(&state->sigchld, SIGCHLD, mode_exec_sig, state); + signal_set(&state->sigint, SIGINT, mode_exec_sig, state); + + if (user != NULL) { + struct passwd *pw; + + pw = getpwnam(user); + if (pw == NULL) + errx(1, "unable to lookup user %s", user); + + state->uid = pw->pw_uid; + state->gid = pw->pw_gid; + + endpwent(); + + state->ngroups = nitems(state->groups); + if (getgrouplist(user, pw->pw_gid, + state->groups, &state->ngroups) == -1) + errx(1, "unable to get groups for user %s", user); + } + + return (state); +} + +static int +mode_exec_run(struct kutrace *kut) +{ + struct mode_exec_state *state = kut->mode; + + signal_add(&state->sigchld, NULL); + signal_add(&state->sigint, NULL); + + state->pid = fork(); + switch (state->pid) { + case -1: + err(1, "unable to fork"); + /* NOTREACHED */ + case 0: /* child */ + break; + default: /* parent */ + return (0); + } + + if (state->uid != 0) { + if (setresgid(state->gid, state->gid, state->gid) == -1) + err(1, "setresgid %d", state->gid); + + if (setgroups(state->ngroups, state->groups) == -1) + err(1, "setgroups"); + + if (setresuid(state->uid, state->uid, state->uid) == -1) + err(1, "setresuid %d", state->uid); + } + + execvp(state->argv[0], state->argv); + + err(1, "exec %s", state->argv[0]); + return (-1); +}