#include #include #include #include #define CPUID(code, eax, ebx, ecx, edx) \ __asm volatile("cpuid" \ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \ : "a" (code)) #define CPUID_LEAF(code, leaf, eax, ebx, ecx, edx) \ __asm volatile("cpuid" \ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \ : "a" (code), "c" (leaf)) #define ISSET(_v, _m) ((_v) & (_m)) struct x86_gprs { uint32_t eax; uint32_t ebx; uint32_t ecx; uint32_t edx; }; void cpuid(struct x86_gprs *gprs, uint32_t code, uint32_t leaf) { uint32_t eax, ebx, ecx, edx; CPUID_LEAF(code, leaf, eax, ebx, ecx, edx); gprs->eax = eax; gprs->ebx = ebx; gprs->ecx = ecx; gprs->edx = edx; } #if 1 static int printable(int ch) { if (ch == '\0') return ('_'); if (!isprint(ch)) return ('~'); return (ch); } static void hexdump(const void *d, size_t datalen) { const uint8_t *data = d; size_t i, j = 0; for (i = 0; i < datalen; i += j) { printf("%zu: ", i); for (j = 0; j < 16 && i+j < datalen; j++) printf("%02x ", data[i + j]); while (j++ < 16) printf(" "); printf("|"); for (j = 0; j < 16 && i+j < datalen; j++) putchar(printable(data[i + j])); printf("|\n"); } } #endif struct cpu_info { u_int ci_cpuid; u_int32_t ci_family; u_int32_t ci_model; u_int32_t ci_signature; u_int32_t ci_feature_flags; u_int32_t ci_pnfeatset; u_int32_t ci_smt_id; u_int32_t ci_core_id; u_int32_t ci_pkg_id; struct cpu_info *ci_next; }; struct cpu_info cpu_info_primary = { .ci_next = NULL }; struct cpu_info *cpu_info_list = &cpu_info_primary; #define CPU_INFO_ITERATOR int #define CPU_INFO_FOREACH(cii, ci) for (cii = 0, ci = cpu_info_list; \ ci != NULL; ci = ci->ci_next) #define CPU_INFO_UNIT(ci) ((ci)->ci_dev ? (ci)->ci_dev->dv_unit : 0) int cpuid_level; char cpu_vendor[16]; #if 1 /* * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know). */ #ifndef SMALL_KERNEL static int log2(unsigned int i) { int ret = 0; while (i >>= 1) ret++; return (ret); } static int mask_width(u_int x) { int bit; int mask; int powerof2; powerof2 = ((x - 1) & x) == 0; mask = (x << (1 - powerof2)) - 1; /* fls */ if (mask == 0) return (0); for (bit = 1; mask != 1; bit++) mask = (unsigned int)mask >> 1; return (bit); } #define DEBUG static void cpu_topology_amd_zen(struct cpu_info *ci) { uint32_t eax, ebx, ecx, edx; uint32_t apicidsize, nthreads = 1; uint32_t lapicid; uint32_t coreid; CPUID(0x80000008, eax, ebx, ecx, edx); apicidsize = (ecx >> 12) & 0xf; #ifdef DEBUG /* * ECX [Size Identifiers] (Core::X86::Cpuid::SizeId) */ printf("cpu%d: CPUID_Fn%08x_ECX NC %u ApicIdSize %u\n", ci->ci_cpuid, 0x80000008, (ecx & 0xff) + 1, apicidsize); #endif CPUID(0x00000001, eax, ebx, ecx, edx); lapicid = (ebx >> 24) & 0xff; #ifdef DEBUG /* * EBX [LocalApicId, LogicalProcessorCount, CLFlush] * (Core::X86::Cpuid::FeatureIdEbx) */ printf("cpu%d: CPUID_Fn%08x_EBX " "LocalApicId %u LogicalProcessorCount %u\n", ci->ci_cpuid, 0x00000001, lapicid, (ebx >> 16) & 0xff); printf("cpu%d: CPUID_Fn%08x_EDX HTT %u\n", ci->ci_cpuid, 0x00000001, !!ISSET(edx, 1 << 28)); #endif CPUID(0x8000001e, eax, ebx, ecx, edx); nthreads = ((ebx >> 8) & 0xff) + 1; #ifdef DEBUG /* * EAX [Extended APIC ID] (Core::X86::Cpuid::ExtApicId) */ printf("cpu%d: CPUID_Fn%08x_EAX ExtendedApicId %u\n", ci->ci_cpuid, 0x8000001e, eax); /* * EBX [Core Identifiers] (Core::X86::Cpuid::CoreId) */ printf("cpu%d: CPUID_Fn%08x_EBX ThreadsPerCore %u CoreId %u\n", ci->ci_cpuid, 0x8000001e, ((ebx >> 8) & 0xff) + 1, (ebx >> 0) & 0xff); /* * ECX [Node Identifiers] (Core::X86::Cpuid::NodeId) */ printf("cpu%d: CPUID_Fn%08x_ECX NodesPerProcessor %u NodeId %u\n", ci->ci_cpuid, 0x8000001e, ((ecx >> 8) & 0x7) + 1, (ecx >> 0) & 0xff); #endif ci->ci_pkg_id = (lapicid >> apicidsize); coreid = lapicid & ((1 << apicidsize) - 1); ci->ci_smt_id = coreid % nthreads; ci->ci_core_id = coreid / nthreads; } #undef DEBUG #endif /* ifndef SMALL_KERNEL */ /* * Build up cpu topology for given cpu, must run on the core itself. */ void cpu_topology(struct cpu_info *ci) { #ifndef SMALL_KERNEL u_int32_t eax, ebx, ecx, edx; u_int32_t apicid, max_apicid = 0, max_coreid = 0; u_int32_t smt_bits = 0, core_bits, pkg_bits = 0; u_int32_t smt_mask = 0, core_mask, pkg_mask = 0; /* We need at least apicid at CPUID 1 */ if (cpuid_level < 1) goto no_topology; /* Initial apicid */ CPUID(1, eax, ebx, ecx, edx); apicid = (ebx >> 24) & 0xff; if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { /* We need at least apicid at CPUID 0x80000008 */ if (ci->ci_pnfeatset < 0x80000008) goto no_topology; if (ci->ci_family == 0x17) cpu_topology_amd_zen(ci); else if (ci->ci_pnfeatset >= 0x8000001e) { struct cpu_info *ci_other; CPU_INFO_ITERATOR cii; CPUID(0x8000001e, eax, ebx, ecx, edx); ci->ci_core_id = ebx & 0xff; ci->ci_pkg_id = ecx & 0xff; ci->ci_smt_id = 0; CPU_INFO_FOREACH(cii, ci_other) { if (ci != ci_other && ci_other->ci_core_id == ci->ci_core_id && ci_other->ci_pkg_id == ci->ci_pkg_id) ci->ci_smt_id++; } } else { CPUID(0x80000008, eax, ebx, ecx, edx); core_bits = (ecx >> 12) & 0xf; if (core_bits == 0) goto no_topology; /* So coreidsize 2 gives 3, 3 gives 7... */ core_mask = (1 << core_bits) - 1; /* Core id is the least significant considering mask */ ci->ci_core_id = apicid & core_mask; /* Pkg id is the upper remaining bits */ ci->ci_pkg_id = apicid & ~core_mask; ci->ci_pkg_id >>= core_bits; } } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) { /* We only support leaf 1/4 detection */ if (cpuid_level < 4) goto no_topology; /* Get max_apicid */ CPUID(1, eax, ebx, ecx, edx); max_apicid = (ebx >> 16) & 0xff; /* Get max_coreid */ CPUID_LEAF(4, 0, eax, ebx, ecx, edx); max_coreid = ((eax >> 26) & 0x3f) + 1; /* SMT */ smt_bits = mask_width(max_apicid / max_coreid); smt_mask = (1 << smt_bits) - 1; /* Core */ core_bits = log2(max_coreid); core_mask = (1 << (core_bits + smt_bits)) - 1; core_mask ^= smt_mask; /* Pkg */ pkg_bits = core_bits + smt_bits; pkg_mask = -1 << core_bits; ci->ci_smt_id = apicid & smt_mask; ci->ci_core_id = (apicid & core_mask) >> smt_bits; ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits; } else goto no_topology; #ifdef DEBUG printf("cpu%d: smt %u, core %u, pkg %u " "(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x, smt_mask 0x%x, " "core_bits 0x%x, core_mask 0x%x, pkg_bits 0x%x, pkg_mask 0x%x)\n ", ci->ci_cpuid, ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id, apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits, core_mask, pkg_bits, pkg_mask); #else printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid, ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id); #endif return; /* We can't map, so consider ci_core_id as ci_cpuid */ no_topology: #endif ci->ci_smt_id = 0; ci->ci_core_id = ci->ci_cpuid; ci->ci_pkg_id = 0; } #endif void cpuid_dump(struct x86_gprs *gprs, uint32_t fn, uint32_t leaf) { cpuid(gprs, fn, leaf); printf("cpuid Fn%08x => %08x %08x %08x %08x\n", fn, gprs->eax, gprs->ebx, gprs->ecx, gprs->edx); hexdump(gprs, sizeof(*gprs)); } int main(int argc, char *argv[]) { struct x86_gprs gprs; struct cpu_info *ci = &cpu_info_primary; cpuid_dump(&gprs, 0, 0); cpuid_level = gprs.eax; memcpy(cpu_vendor + 0, &gprs.ebx, sizeof(gprs.ebx)); memcpy(cpu_vendor + 4, &gprs.edx, sizeof(gprs.edx)); memcpy(cpu_vendor + 8, &gprs.ecx, sizeof(gprs.ecx)); printf("cpuid 0 -> level 0x%0x vendor %s\n", cpuid_level, cpu_vendor); cpuid_dump(&gprs, 1, 0); ci->ci_signature = gprs.eax; ci->ci_feature_flags = gprs.edx; cpuid_dump(&gprs, 0x80000000, 0); ci->ci_pnfeatset = gprs.eax; ci->ci_family = (ci->ci_signature >> 8) & 0x0f; ci->ci_model = (ci->ci_signature >> 4) & 0x0f; if (ci->ci_family == 0x6 || ci->ci_family == 0xf) { ci->ci_family += (ci->ci_signature >> 20) & 0xff; ci->ci_model += ((ci->ci_signature >> 16) & 0x0f) << 4; } printf("family %xh model %xh\n", ci->ci_family, ci->ci_model); cpuid_dump(&gprs, 0x80000008, 0); printf("nc %u\n", (gprs.ecx & 0xff) + 1); cpuid_dump(&gprs, 0x8000001e, 0); cpu_topology(ci); return (0); }