? sys/arch/amd64/amd64/msrs.c.original ? sys/arch/amd64/compile/GENERIC.MP.PROF ? sys/arch/amd64/compile/PROFILED.MP ? sys/arch/amd64/compile/WITNESS.MP ? sys/arch/amd64/conf/PROFILED.MP ? sys/arch/amd64/conf/WITNESS.MP Index: sys/sys/kstat.h =================================================================== RCS file: /cvs/src/sys/sys/kstat.h,v retrieving revision 1.4 diff -u -p -r1.4 kstat.h --- sys/sys/kstat.h 16 Nov 2023 02:45:20 -0000 1.4 +++ sys/sys/kstat.h 17 Nov 2023 04:01:40 -0000 @@ -93,6 +93,8 @@ enum kstat_kv_unit { KSTAT_KV_U_PACKETS, /* packets */ KSTAT_KV_U_BYTES, /* bytes */ KSTAT_KV_U_CYCLES, /* cycles */ + KSTAT_KV_U_INSTR, /* instructions */ + KSTAT_KV_U_UJOULES, /* uJoules */ }; struct kstat_kv { Index: sys/arch/amd64/amd64/cpumon.c =================================================================== RCS file: sys/arch/amd64/amd64/cpumon.c diff -N sys/arch/amd64/amd64/cpumon.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/arch/amd64/amd64/cpumon.c 17 Nov 2023 04:01:41 -0000 @@ -0,0 +1,652 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022, 2023 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "kstat.h" + +#if NKSTAT == 0 +#error cpumon(4) requires kstat(4) +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define MSR_RAPL_PWR_UNIT 0xc0010299 +#define MSR_RAPL_PWR_UNIT_ESU_SHIFT 8 +#define MSR_RAPL_PWR_UNIT_ESU_MASK 0x1f +#define MSR_CORE_ENERGY_STATE 0xc001029a +#define MSR_PKG_ENERGY_STATE 0xc001029b + +#define CPUMON_MSR_AMD_RAPL_PWR_UNIT 0xC0010299 +#define CPUMON_MSR_AMD_RAPL_PWR_UNIT_ESU_SHIFT 8 +#define CPUMON_MSR_AMD_SR_RAPL_PWR_UNIT_ESU_MASK 0x1f +#define CPUMON_MSR_AMD_CORE_ENERGY_STAT 0xC001029A +#define CPUMON_MSR_AMD_PKG_ENERGY_STAT 0xC001029B + +struct cpumon_rapl { + struct cpumon_softc *rapl_sc; + struct cpu_info *rapl_ci; + struct task rapl_xcall; + + uint64_t rapl_energy; /* accumulator */ + uint32_t rapl_energy_prev; + unsigned int rapl_energy_unit; + uint32_t rapl_energy_msr; +}; + +enum cpumon_core_map { + CPUMON_CORE_MAP_TSC, + CPUMON_CORE_MAP_EFF_PERF, + CPUMON_CORE_MAP_IRPERF, + + CPUMON_CORE_MAP_RAPL, + + CPUMON_CORE_MAP_COUNT +}; + +struct cpumon_core { + struct cpumon_softc *c_sc; + struct cpu_info *c_ci; + struct kstat *c_ks; + + TAILQ_ENTRY(cpumon_core) c_entry; + + unsigned int c_nkvs; + int8_t c_map[CPUMON_CORE_MAP_COUNT]; + + struct cpumon_rapl c_rapl; +}; + +TAILQ_HEAD(cpumon_cores, cpumon_core); + +enum cpumon_pkg_map { + CPUMON_PKG_MAP_RAPL, + + CPUMON_PKG_MAP_COUNT +}; + +struct cpumon_pkg { + struct cpumon_softc *p_sc; + struct cpu_info *p_ci; + struct kstat *p_ks; + + TAILQ_ENTRY(cpumon_pkg) p_entry; + + unsigned int p_nkvs; + int8_t p_map[CPUMON_PKG_MAP_COUNT]; + + struct cpumon_rapl p_rapl; +}; + +TAILQ_HEAD(cpumon_pkgs, cpumon_pkg); + +struct cpumon_softc { + struct device sc_dev; + struct task sc_deferred; + + struct cpumon_cores sc_cores; + struct cpumon_pkgs sc_pkgs; + + /* used by the ticks below to wait for all cores/pkgs to read stuff */ + struct refcnt sc_rapl_refs; + + struct timeout sc_core_rapl_tick; + struct timeout sc_pkg_rapl_tick; +}; + +static int cpumon_match(struct device *, void *, void *); +static void cpumon_attach(struct device *, struct device *, void *); + +struct cfdriver cpumon_cd = { + NULL, "cpumon", DV_DULL, CD_SKIPHIBERNATE +}; + +const struct cfattach cpumon_ca = { + sizeof(struct cpumon_softc), cpumon_match, cpumon_attach, NULL, NULL +}; + +static void cpumon_deferred(void *); +static struct cpumon_core * + cpumon_attach_core(struct cpumon_softc *, struct cpu_info *); +static struct cpumon_pkg * + cpumon_attach_pkg(struct cpumon_softc *, struct cpu_info *); + +static void cpumon_core_rapl_tick(void *); +static void cpumon_pkg_rapl_tick(void *); + +static int +cpumon_match(struct device *parent, void *match, void *aux) +{ + const char **busname = (const char **)aux; + + if (strcmp(*busname, cpumon_cd.cd_name) != 0) + return (0); + + return (1); +} + +static void +cpumon_attach(struct device *parent, struct device *self, void *aux) +{ + struct cpumon_softc *sc = (struct cpumon_softc *)self; + + printf("\n"); + + task_set(&sc->sc_deferred, cpumon_deferred, sc); + TAILQ_INIT(&sc->sc_cores); + TAILQ_INIT(&sc->sc_pkgs); + timeout_set_proc(&sc->sc_core_rapl_tick, cpumon_core_rapl_tick, sc); + timeout_set_proc(&sc->sc_pkg_rapl_tick, cpumon_pkg_rapl_tick, sc); + + task_add(systqmp, &sc->sc_deferred); +} + +static inline uint32_t +cpumon_rapl_read_msr(const struct cpumon_rapl *rapl) +{ + return (rdmsr(rapl->rapl_energy_msr)); +} + +static void +cpumon_rapl_tick(void *arg) +{ + struct cpumon_rapl *rapl = arg; + struct cpumon_softc *sc = rapl->rapl_sc; + uint32_t energy_now; + uint32_t diff; + + energy_now = cpumon_rapl_read_msr(rapl); + diff = energy_now - rapl->rapl_energy_prev; + + rapl->rapl_energy_prev = energy_now; + rapl->rapl_energy += diff; + + refcnt_rele_wake(&sc->sc_rapl_refs); +} + +static uint64_t +cpumon_rapl_read(struct cpumon_rapl *rapl, uint32_t energy_now) +{ + uint32_t diff = energy_now - rapl->rapl_energy_prev; + uint64_t energy = rapl->rapl_energy + diff; + + rapl->rapl_energy_prev = energy_now; + rapl->rapl_energy = energy; + + /* XXX i feel like this will overflow */ + return ((energy * 1000000) >> rapl->rapl_energy_unit); +} + +static void +cpumon_probe_core_effperf(struct cpumon_core *c) +{ + uint32_t eax, ebx, ecx, edx; + + CPUID(0x06, eax, ebx, ecx, edx); + + if (ecx & (1 << 0)) { + c->c_map[CPUMON_CORE_MAP_EFF_PERF] = c->c_nkvs; + c->c_nkvs += 2; + } +} + +static void +cpumon_probe_core_intel(struct cpumon_core *c) +{ + if (cpuid_level < 0x06) + return; + + cpumon_probe_core_effperf(c); +} + +static void +cpumon_probe_core_amd(struct cpumon_core *c) +{ + cpumon_probe_core_effperf(c); + + if (c->c_ci->ci_family >= 0x17) { + uint32_t eax, ebx, ecx, edx; + + CPUID(0x80000008, eax, ebx, ecx, edx); + if (ebx & (1 << 1)) { + c->c_map[CPUMON_CORE_MAP_IRPERF] = c->c_nkvs; + c->c_nkvs += 1; + } + + CPUID(0x80000007, eax, ebx, ecx, edx); + if (edx & (1 << 14)) { + c->c_map[CPUMON_CORE_MAP_RAPL] = c->c_nkvs; + c->c_nkvs += 1; + } + } +} + +static void +cpumon_deferred(void *arg) +{ + struct cpumon_softc *sc = arg; + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + struct cpumon_core *c; + int rapl = 0; + + CPU_INFO_FOREACH(cii, ci) { + sched_peg_curproc(ci); + + c = cpumon_attach_core(sc, ci); + if (c && c->c_map[CPUMON_CORE_MAP_RAPL]) + rapl = 1; + + cpumon_attach_pkg(sc, ci); + } + + atomic_clearbits_int(&curproc->p_flag, P_CPUPEG); + + if (rapl) + timeout_add_sec(&sc->sc_core_rapl_tick, 53); + + if (!TAILQ_EMPTY(&sc->sc_pkgs)) + timeout_add_sec(&sc->sc_pkg_rapl_tick, 5); +} + +static void +cpumon_core_rapl_tick(void *arg) +{ + struct cpumon_softc *sc = arg; + struct cpumon_core *c; + + refcnt_init(&sc->sc_rapl_refs); + + TAILQ_FOREACH(c, &sc->sc_cores, c_entry) { + if (c->c_map[CPUMON_CORE_MAP_RAPL] == 0) { + /* is this even possible? */ + continue; + } + + refcnt_take(&sc->sc_rapl_refs); + cpu_xcall(c->c_ci, &c->c_rapl.rapl_xcall); + } + + refcnt_finalize(&sc->sc_rapl_refs, "cpurapl"); + + /* this doesnt have to be accurate */ + timeout_add_sec(&sc->sc_core_rapl_tick, 53); +} + +static void +cpumon_pkg_rapl_tick(void *arg) +{ + struct cpumon_softc *sc = arg; + struct cpumon_pkg *p; + + refcnt_init(&sc->sc_rapl_refs); + + TAILQ_FOREACH(p, &sc->sc_pkgs, p_entry) { + if (p->p_map[CPUMON_PKG_MAP_RAPL] == 0) { + /* is this even possible? */ + continue; + } + + refcnt_take(&sc->sc_rapl_refs); + cpu_xcall(p->p_ci, &p->p_rapl.rapl_xcall); + } + + refcnt_finalize(&sc->sc_rapl_refs, "pkgrapl"); + + /* this doesnt have to be accurate */ + timeout_add_sec(&sc->sc_core_rapl_tick, 5); +} + +struct cpumon_xcall { + struct kstat *cx_ks; + struct cond cx_c; +}; + +static void +cpumon_read_core_xcall(void *arg) +{ + struct cpumon_xcall *cx = arg; + struct kstat *ks = cx->cx_ks; + struct kstat_kv *kvs = ks->ks_data; + struct cpumon_core *c = ks->ks_softc; + unsigned long s; + uint32_t energy_now; + int idx, rapl; + + s = intr_disable(); + idx = c->c_map[CPUMON_CORE_MAP_TSC]; + if (idx) + kstat_kv_u64(&kvs[idx]) = rdtsc_lfence(); + + idx = c->c_map[CPUMON_CORE_MAP_EFF_PERF]; + if (idx) { + kstat_kv_u64(&kvs[idx + 0]) = rdmsr(0xe7); + kstat_kv_u64(&kvs[idx + 1]) = rdmsr(0xe8); + } + + idx = c->c_map[CPUMON_CORE_MAP_IRPERF]; + if (idx) + kstat_kv_u64(&kvs[idx]) = rdmsr(0xe7); + + rapl = c->c_map[CPUMON_CORE_MAP_RAPL]; + if (rapl) + energy_now = cpumon_rapl_read_msr(&c->c_rapl); + + nanouptime(&ks->ks_updated); + intr_restore(s); + + if (rapl) { + kstat_kv_u64(&kvs[rapl]) = + cpumon_rapl_read(&c->c_rapl, energy_now); + } + + cond_signal(&cx->cx_c); +} + +static int +cpumon_read_core(struct kstat *ks) +{ + struct timespec now, diff; + + /* rate limit the updates to roughly twice a second */ + getnanouptime(&now); + timespecsub(&now, &ks->ks_updated, &diff); + if (diff.tv_sec > 0 || diff.tv_nsec > 500000000) { + struct cpumon_xcall cx = { ks, COND_INITIALIZER() }; + struct task t = TASK_INITIALIZER(cpumon_read_core_xcall, &cx); + struct cpumon_core *c = ks->ks_softc; + + cpu_xcall(c->c_ci, &t); + + cond_wait(&cx.cx_c, "cpumonc"); + } + + return (0); +} + +static struct cpumon_core * +cpumon_attach_core(struct cpumon_softc *sc, struct cpu_info *ci) +{ + struct kstat *ks; + struct kstat_kv *kvs; + struct cpumon_core *c; + int idx; + + TAILQ_FOREACH(c, &sc->sc_cores, c_entry) { + if (ci->ci_pkg_id == c->c_ci->ci_pkg_id && + ci->ci_core_id == c->c_ci->ci_core_id) { + /* core is already being monitored */ + + if (ci->ci_smt_id < c->c_ci->ci_smt_id) { + /* prefer low threads */ + c->c_ci = ci; + } + + return (NULL); + } + } + + ks = kstat_create("cpu-core", ci->ci_pkg_id << 24 | ci->ci_core_id, + "cpumon", 0, KSTAT_T_KV, 0); + if (ks == NULL) { + printf("unable to create cpu-core kstat for pkg %u core %d\n", + ci->ci_pkg_id, ci->ci_core_id); + return (NULL); + } + + c = malloc(sizeof(*c), M_DEVBUF, M_WAITOK|M_ZERO); + c->c_sc = sc; + c->c_ci = ci; + c->c_ks = ks; + c->c_nkvs = 2; /* pkg and core ids */ + + /* assume we have tsc */ + c->c_map[CPUMON_CORE_MAP_TSC] = c->c_nkvs; + c->c_nkvs += 1; + + if (strcmp(cpu_vendor, "GenuineIntel") == 0) + cpumon_probe_core_intel(c); + else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) + cpumon_probe_core_amd(c); + + kvs = mallocarray(c->c_nkvs, sizeof(*kvs), M_DEVBUF, M_WAITOK|M_ZERO); + + kstat_kv_init(&kvs[0], "package", KSTAT_KV_T_UINT32); + kstat_kv_u32(&kvs[0]) = ci->ci_pkg_id; + kstat_kv_init(&kvs[1], "core", KSTAT_KV_T_UINT32); + kstat_kv_u32(&kvs[1]) = ci->ci_core_id; + + idx = c->c_map[CPUMON_CORE_MAP_TSC]; + if (idx) { + kstat_kv_unit_init(&kvs[idx], "tsc", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_CYCLES); + } + + idx = c->c_map[CPUMON_CORE_MAP_EFF_PERF]; + if (idx) { + kstat_kv_init(&kvs[idx + 0], "aperf", KSTAT_KV_T_COUNTER64); + kstat_kv_init(&kvs[idx + 1], "mperf", KSTAT_KV_T_COUNTER64); + } + + idx = c->c_map[CPUMON_CORE_MAP_IRPERF]; + if (idx) { + uint64_t msr; + + msr = rdmsr(0xC0010015); + SET(msr, (1 << 30)); + wrmsr(0xC0010015, msr); + + kstat_kv_unit_init(&kvs[idx], "irperf", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_INSTR); + } + + idx = c->c_map[CPUMON_CORE_MAP_RAPL]; + if (idx) { + uint64_t rapl_pwr_unit; + unsigned int unit; + + rapl_pwr_unit = rdmsr(CPUMON_MSR_AMD_RAPL_PWR_UNIT); + unit = rapl_pwr_unit >> CPUMON_MSR_AMD_RAPL_PWR_UNIT_ESU_SHIFT; + unit &= CPUMON_MSR_AMD_SR_RAPL_PWR_UNIT_ESU_MASK; + + c->c_rapl.rapl_sc = sc; + task_set(&c->c_rapl.rapl_xcall, cpumon_rapl_tick, &c->c_rapl); + + c->c_rapl.rapl_energy_msr = CPUMON_MSR_AMD_CORE_ENERGY_STAT; + c->c_rapl.rapl_energy_prev = rdmsr(c->c_rapl.rapl_energy_msr); + c->c_rapl.rapl_energy_unit = unit; + + kstat_kv_unit_init(&kvs[idx], "energy", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_UJOULES); + } + + ks->ks_data = kvs; + ks->ks_datalen = c->c_nkvs * sizeof(*kvs); + ks->ks_read = cpumon_read_core; + ks->ks_softc = c; + + kstat_install(ks); + + TAILQ_INSERT_TAIL(&sc->sc_cores, c, c_entry); + + return (c); +} + +static void +cpumon_read_pkg_xcall(void *arg) +{ + struct cpumon_xcall *cx = arg; + struct kstat *ks = cx->cx_ks; + struct kstat_kv *kvs = ks->ks_data; + struct cpumon_pkg *p = ks->ks_softc; + unsigned long s; + uint32_t energy_now; + int rapl; + + s = intr_disable(); + rapl = p->p_map[CPUMON_PKG_MAP_RAPL]; + if (rapl) + energy_now = cpumon_rapl_read_msr(&p->p_rapl); + + nanouptime(&ks->ks_updated); + intr_restore(s); + + if (rapl) { + kstat_kv_u64(&kvs[rapl]) = + cpumon_rapl_read(&p->p_rapl, energy_now); + } + + cond_signal(&cx->cx_c); +} + +static int +cpumon_read_pkg(struct kstat *ks) +{ + struct timespec now, diff; + + /* rate limit the updates to roughly twice a second */ + getnanouptime(&now); + timespecsub(&now, &ks->ks_updated, &diff); + if (diff.tv_sec > 0 || diff.tv_nsec > 500000000) { + struct cpumon_xcall cx = { ks, COND_INITIALIZER() }; + struct task t = TASK_INITIALIZER(cpumon_read_pkg_xcall, &cx); + struct cpumon_pkg *p = ks->ks_softc; + + cpu_xcall(p->p_ci, &t); + + cond_wait(&cx.cx_c, "cpumonp"); + } + + return (0); +} + +static int +cpumon_probe_pkg_intel(struct cpumon_pkg *p) +{ + /* cpu temperature? rapl? */ + return (0); +} + +static int +cpumon_probe_pkg_amd(struct cpumon_pkg *p) +{ + int rv = 0; + + if (p->p_ci->ci_family >= 0x17) { + uint32_t eax, ebx, ecx, edx; + + CPUID(0x80000007, eax, ebx, ecx, edx); + if (edx & (1 << 14)) { + p->p_map[CPUMON_PKG_MAP_RAPL] = p->p_nkvs; + p->p_nkvs += 1; + + rv = 1; + } + } + + return (rv); +} + +static struct cpumon_pkg * +cpumon_attach_pkg(struct cpumon_softc *sc, struct cpu_info *ci) +{ + struct kstat *ks; + struct kstat_kv *kvs; + struct cpumon_pkg *p; + int idx; + int rv = 0; + + TAILQ_FOREACH(p, &sc->sc_pkgs, p_entry) { + if (ci->ci_pkg_id == p->p_ci->ci_pkg_id) { + /* pkg is already being monitored */ + + return (NULL); + } + } + + p = malloc(sizeof(*p), M_DEVBUF, M_WAITOK|M_ZERO); + p->p_sc = sc; + p->p_ci = ci; + p->p_nkvs = 1; /* pkg id */ + + if (strcmp(cpu_vendor, "GenuineIntel") == 0) + rv = cpumon_probe_pkg_intel(p); + else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) + rv = cpumon_probe_pkg_amd(p); + + if (rv == 0) { + free(p, M_DEVBUF, sizeof(*p)); + return (NULL); + } + + ks = kstat_create("cpu-pkg", ci->ci_pkg_id, "cpumon", 0, KSTAT_T_KV, 0); + if (ks == NULL) { + printf("unable to create cpu-pkg kstat for pkg %u\n", + ci->ci_pkg_id); + return (NULL); + } + + kvs = mallocarray(p->p_nkvs, sizeof(*kvs), M_DEVBUF, M_WAITOK|M_ZERO); + + kstat_kv_init(&kvs[0], "package", KSTAT_KV_T_UINT32); + kstat_kv_u32(&kvs[0]) = ci->ci_pkg_id; + + idx = p->p_map[CPUMON_PKG_MAP_RAPL]; + if (idx) { + uint64_t rapl_pwr_unit; + unsigned int unit; + + rapl_pwr_unit = rdmsr(CPUMON_MSR_AMD_RAPL_PWR_UNIT); + unit = rapl_pwr_unit >> CPUMON_MSR_AMD_RAPL_PWR_UNIT_ESU_SHIFT; + unit &= CPUMON_MSR_AMD_SR_RAPL_PWR_UNIT_ESU_MASK; + + p->p_rapl.rapl_sc = sc; + task_set(&p->p_rapl.rapl_xcall, cpumon_rapl_tick, &p->p_rapl); + + p->p_rapl.rapl_energy_msr = CPUMON_MSR_AMD_PKG_ENERGY_STAT; + p->p_rapl.rapl_energy_prev = rdmsr(p->p_rapl.rapl_energy_msr); + p->p_rapl.rapl_energy_unit = unit; + + kstat_kv_unit_init(&kvs[idx], "energy", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_UJOULES); + } + + ks->ks_data = kvs; + ks->ks_datalen = p->p_nkvs * sizeof(*kvs); + ks->ks_read = cpumon_read_pkg; + ks->ks_softc = p; + + kstat_install(ks); + + TAILQ_INSERT_TAIL(&sc->sc_pkgs, p, p_entry); + + return (p); +} Index: sys/arch/amd64/amd64/intr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/intr.c,v retrieving revision 1.55 diff -u -p -r1.55 intr.c --- sys/arch/amd64/amd64/intr.c 28 Dec 2020 14:23:30 -0000 1.55 +++ sys/arch/amd64/amd64/intr.c 17 Nov 2023 04:01:41 -0000 @@ -552,7 +552,10 @@ struct intrhand fake_softclock_intrhand; struct intrhand fake_softnet_intrhand; struct intrhand fake_softtty_intrhand; struct intrhand fake_timer_intrhand; +#ifdef MULTIPROCESSOR struct intrhand fake_ipi_intrhand; +struct intrhand fake_xcall_intrhand; +#endif #if NXEN > 0 struct intrhand fake_xen_intrhand; #endif @@ -619,6 +622,17 @@ cpu_intr_init(struct cpu_info *ci) isp->is_handlers = &fake_ipi_intrhand; isp->is_pic = &local_pic; ci->ci_isources[LIR_IPI] = isp; + + isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_NOWAIT|M_ZERO); + if (isp == NULL) + panic("can't allocate fixed interrupt source"); + isp->is_recurse = Xxcallintr; + isp->is_resume = Xxcallintr; + fake_xcall_intrhand.ih_level = IPL_SOFTCLOCK; + fake_xcall_intrhand.ih_flags = IPL_MPSAFE; + isp->is_handlers = &fake_xcall_intrhand; + isp->is_pic = &local_pic; + ci->ci_isources[SIR_XCALL] = isp; #endif #if NXEN > 0 isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_NOWAIT|M_ZERO); Index: sys/arch/amd64/amd64/ipifuncs.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/ipifuncs.c,v retrieving revision 1.37 diff -u -p -r1.37 ipifuncs.c --- sys/arch/amd64/amd64/ipifuncs.c 7 Aug 2022 23:56:06 -0000 1.37 +++ sys/arch/amd64/amd64/ipifuncs.c 17 Nov 2023 04:01:41 -0000 @@ -61,6 +61,7 @@ void x86_64_ipi_nop(struct cpu_info *); void x86_64_ipi_halt(struct cpu_info *); void x86_64_ipi_wbinvd(struct cpu_info *); +void x86_64_ipi_xcall(struct cpu_info *); #if NVMM > 0 void x86_64_ipi_vmclear_vmm(struct cpu_info *); @@ -108,6 +109,7 @@ void (*ipifunc[X86_NIPI])(struct cpu_inf NULL, #endif x86_64_ipi_wbinvd, + x86_64_ipi_xcall, }; void @@ -166,3 +168,13 @@ x86_64_ipi_wbinvd(struct cpu_info *ci) { wbinvd(); } + +void +x86_64_ipi_xcall(struct cpu_info *ci) +{ + /* + * this is an inlining of softintr() because we already have + * curcpu() and the SIR_XCALL bit to set. + */ + x86_atomic_setbits_u64(&ci->ci_ipending, 1UL << SIR_XCALL); +}; Index: sys/arch/amd64/amd64/mainbus.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/mainbus.c,v retrieving revision 1.52 diff -u -p -r1.52 mainbus.c --- sys/arch/amd64/amd64/mainbus.c 21 Feb 2022 11:03:39 -0000 1.52 +++ sys/arch/amd64/amd64/mainbus.c 17 Nov 2023 04:01:41 -0000 @@ -49,6 +49,7 @@ #include "bios.h" #include "mpbios.h" #include "vmm.h" +#include "cpumon.h" #include "pvbus.h" #include "efifb.h" @@ -253,6 +254,13 @@ mainbus_attach(struct device *parent, st if (isa_has_been_seen == 0) config_found(self, &mba_iba, mainbus_print); #endif + +#if NCPUMON > 0 + if (ISSET(cpu_info_primary.ci_feature_flags, CPUID_TSC)) { + mba.mba_busname = "cpumon"; + config_found(self, &mba.mba_busname, mainbus_print); + } +#endif /* NCPUMON > 0 */ #if NVMM > 0 if (vmm_enabled()) { Index: sys/arch/amd64/amd64/softintr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/softintr.c,v retrieving revision 1.10 diff -u -p -r1.10 softintr.c --- sys/arch/amd64/amd64/softintr.c 11 Sep 2020 09:27:09 -0000 1.10 +++ sys/arch/amd64/amd64/softintr.c 17 Nov 2023 04:01:41 -0000 @@ -38,6 +38,9 @@ #include #include +#include +#include + #include #include @@ -169,3 +172,58 @@ softintr_disestablish(void *arg) free(sih, M_DEVBUF, sizeof(*sih)); } + +void +#ifdef MULTIPROCESSOR +cpu_xcall_self(struct task *t) +#else +cpu_xcall(struct cpu_info *ci, struct task *t) +#endif +{ + int s = splsoftclock(); + (*t->t_func)(t->t_arg); + splx(s); +} + +#ifdef MULTIPROCESSOR +void +cpu_xcall(struct cpu_info *ci, struct task *t) +{ + size_t i; + + if (ci == curcpu()) { + /* execute the task immediately on the local cpu */ + cpu_xcall_self(t); + return; + } + + for (;;) { + for (i = 0; i < nitems(ci->ci_xcalls); i++) { + if (atomic_cas_ptr(&ci->ci_xcalls[i], + NULL, t) == NULL) { + /* membar_producer(); */ + x86_send_ipi(ci, X86_IPI_XCALL); + return; + } + } + + CPU_BUSY_CYCLE(); + } +} + +void +cpu_xcall_dispatch(void) +{ + struct cpu_info *ci = curcpu(); + struct task *t; + size_t i; + + for (i = 0; i < nitems(ci->ci_xcalls); i++) { + t = ci->ci_xcalls[i]; + if (t != NULL) { + ci->ci_xcalls[i] = NULL; + (*t->t_func)(t->t_arg); + } + } +} +#endif /* MULTIPROCESSOR */ Index: sys/arch/amd64/amd64/vector.S =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/vector.S,v retrieving revision 1.94 diff -u -p -r1.94 vector.S --- sys/arch/amd64/amd64/vector.S 31 Jul 2023 04:01:07 -0000 1.94 +++ sys/arch/amd64/amd64/vector.S 17 Nov 2023 04:01:41 -0000 @@ -1312,3 +1312,12 @@ KIDTVEC(softclock) jmp retpoline_r13 CODEPATCH_END(CPTAG_RETPOLINE_R13) END(Xsoftclock) + +KIDTVEC(xcallintr) + movl $IPL_SOFTCLOCK, CPUVAR(ILEVEL) + sti + incl CPUVAR(IDEPTH) + call _C_LABEL(cpu_xcall_dispatch) + decl CPUVAR(IDEPTH) + jmp retpoline_r13 +END(Xsoftclock) Index: sys/arch/amd64/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/arch/amd64/conf/GENERIC,v retrieving revision 1.518 diff -u -p -r1.518 GENERIC --- sys/arch/amd64/conf/GENERIC 8 Jul 2023 02:43:02 -0000 1.518 +++ sys/arch/amd64/conf/GENERIC 17 Nov 2023 04:01:41 -0000 @@ -35,6 +35,7 @@ isa0 at amdpcib? isa0 at tcpcib? pci* at mainbus0 vmm0 at mainbus0 +cpumon0 at mainbus0 pvbus0 at mainbus0 acpi0 at bios0 Index: sys/arch/amd64/conf/files.amd64 =================================================================== RCS file: /cvs/src/sys/arch/amd64/conf/files.amd64,v retrieving revision 1.109 diff -u -p -r1.109 files.amd64 --- sys/arch/amd64/conf/files.amd64 8 Jul 2023 08:01:10 -0000 1.109 +++ sys/arch/amd64/conf/files.amd64 17 Nov 2023 04:01:41 -0000 @@ -260,6 +260,13 @@ file arch/amd64/amd64/vmm_machdep.c vmm file arch/amd64/amd64/vmm_support.S vmm # +# MSR kstats +# +device cpumon {} +attach cpumon at mainbus +file arch/amd64/amd64/cpumon.c cpumon needs-flag + +# # Machine-independent SD/MMC drivers # include "dev/sdmmc/files.sdmmc" Index: sys/arch/amd64/include/cpu.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v retrieving revision 1.158 diff -u -p -r1.158 cpu.h --- sys/arch/amd64/include/cpu.h 27 Jul 2023 00:28:24 -0000 1.158 +++ sys/arch/amd64/include/cpu.h 17 Nov 2023 04:01:41 -0000 @@ -92,6 +92,11 @@ union vmm_cpu_cap { }; /* + * for xcalls + */ +struct task; + +/* * Locks used to protect struct members in this file: * I immutable after creation * a atomic operations @@ -199,6 +204,7 @@ struct cpu_info { #ifdef MULTIPROCESSOR struct srp_hazard ci_srp_hazards[SRP_HAZARD_NUM]; + struct task *ci_xcalls[4]; #endif struct ksensordev ci_sensordev; Index: sys/arch/amd64/include/intr.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/intr.h,v retrieving revision 1.33 diff -u -p -r1.33 intr.h --- sys/arch/amd64/include/intr.h 14 Dec 2021 18:16:14 -0000 1.33 +++ sys/arch/amd64/include/intr.h 17 Nov 2023 04:01:41 -0000 @@ -207,6 +207,9 @@ void cpu_intr_init(struct cpu_info *); void intr_printconfig(void); void intr_barrier(void *); +struct task; +void cpu_xcall(struct cpu_info *ci, struct task *); + #ifdef MULTIPROCESSOR void x86_send_ipi(struct cpu_info *, int); int x86_fast_ipi(struct cpu_info *, int); @@ -215,6 +218,8 @@ void x86_ipi_handler(void); void x86_setperf_ipi(struct cpu_info *); extern void (*ipifunc[X86_NIPI])(struct cpu_info *); + +extern void Xxcallintr(void); #endif #endif /* !_LOCORE */ Index: sys/arch/amd64/include/intrdefs.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/intrdefs.h,v retrieving revision 1.22 diff -u -p -r1.22 intrdefs.h --- sys/arch/amd64/include/intrdefs.h 31 Aug 2021 17:40:59 -0000 1.22 +++ sys/arch/amd64/include/intrdefs.h 17 Nov 2023 04:01:41 -0000 @@ -54,9 +54,10 @@ #define SIR_CLOCK 61 #define SIR_NET 60 #define SIR_TTY 59 +#define SIR_XCALL 58 -#define LIR_XEN 58 -#define LIR_HYPERV 57 +#define LIR_XEN 57 +#define LIR_HYPERV 56 /* * Maximum # of interrupt sources per CPU. 64 to fit in one word. @@ -84,8 +85,9 @@ #define X86_IPI_START_VMM 0x00000100 #define X86_IPI_STOP_VMM 0x00000200 #define X86_IPI_WBINVD 0x00000400 +#define X86_IPI_XCALL 0x00000800 -#define X86_NIPI 12 +#define X86_NIPI 13 #define IREENT_MAGIC 0x18041969 Index: usr.bin/kstat/kstat.c =================================================================== RCS file: /cvs/src/usr.bin/kstat/kstat.c,v retrieving revision 1.13 diff -u -p -r1.13 kstat.c --- usr.bin/kstat/kstat.c 16 Nov 2023 03:17:34 -0000 1.13 +++ usr.bin/kstat/kstat.c 17 Nov 2023 04:01:41 -0000 @@ -490,6 +490,12 @@ kstat_kv(const void *d, ssize_t len) case KSTAT_KV_U_CYCLES: printf(" cycles"); break; + case KSTAT_KV_U_INSTR: + printf(" instructions"); + break; + case KSTAT_KV_U_UJOULES: + printf(" micro-joules"); + break; default: printf(" unit-type-%u", kv->kv_unit);