? arch/amd64/stand/biosboot/assym.h ? arch/amd64/stand/boot/assym.h ? arch/amd64/stand/cdboot/assym.h ? arch/amd64/stand/fdboot/assym.h ? arch/amd64/stand/mbr/assym.h ? arch/amd64/stand/pxeboot/assym.h ? arch/i386/compile/GENERIC ? net/gre ? net/if_ngre.c ? sys/lockdebug.h Index: arch/amd64/amd64/cpu.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v retrieving revision 1.102 diff -u -p -r1.102 cpu.c --- arch/amd64/amd64/cpu.c 28 Jul 2016 21:57:57 -0000 1.102 +++ arch/amd64/amd64/cpu.c 6 Mar 2017 00:44:02 -0000 @@ -426,6 +426,7 @@ cpu_attach(struct device *parent, struct /* * Enable local apic */ + lapic_attach(ci); lapic_enable(); lapic_calibrate_timer(ci); #endif @@ -462,6 +463,9 @@ cpu_attach(struct device *parent, struct cpu_vm_init(ci); #if defined(MULTIPROCESSOR) + mtx_init(&ci->ci_xcall_mtx, IPL_HIGH); + TAILQ_INIT(&ci->ci_xcall_list); + if (mp_verbose) { printf("%s: kstack at 0x%lx for %d bytes\n", sc->sc_dev.dv_xname, kstack, USPACE); @@ -683,6 +687,7 @@ cpu_hatch(void *v) ci->ci_flags |= CPUF_PRESENT; + lapic_attach(ci); lapic_enable(); lapic_startclock(); Index: arch/amd64/amd64/intr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/intr.c,v retrieving revision 1.48 diff -u -p -r1.48 intr.c --- arch/amd64/amd64/intr.c 22 Jun 2016 01:12:38 -0000 1.48 +++ arch/amd64/amd64/intr.c 6 Mar 2017 00:44:02 -0000 @@ -550,7 +550,10 @@ struct intrhand fake_softclock_intrhand; struct intrhand fake_softnet_intrhand; struct intrhand fake_softtty_intrhand; struct intrhand fake_timer_intrhand; +#ifdef MULTIPROCESSOR struct intrhand fake_ipi_intrhand; +struct intrhand fake_xcall_intrhand; +#endif #if NXEN > 0 struct intrhand fake_xen_intrhand; #endif @@ -621,6 +624,15 @@ cpu_intr_init(struct cpu_info *ci) isp->is_handlers = &fake_ipi_intrhand; isp->is_pic = &local_pic; ci->ci_isources[LIR_IPI] = isp; + isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_NOWAIT|M_ZERO); + if (isp == NULL) + panic("can't allocate fixed interrupt source"); + isp->is_recurse = Xxcallintr; + isp->is_resume = Xxcallintr; + fake_xcall_intrhand.ih_level = IPL_SOFTCLOCK; + isp->is_handlers = &fake_xcall_intrhand; + isp->is_pic = &local_pic; + ci->ci_isources[SIR_XCALL] = isp; #endif #if NXEN > 0 isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_NOWAIT|M_ZERO); @@ -741,6 +753,5 @@ softintr(int sir) { struct cpu_info *ci = curcpu(); - __asm volatile("lock; orq %1, %0" : - "=m"(ci->ci_ipending) : "ir" (1UL << sir)); + x86_atomic_setbits_u64(&ci->ci_ipending, 1UL << sir); } Index: arch/amd64/amd64/ipifuncs.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/ipifuncs.c,v retrieving revision 1.28 diff -u -p -r1.28 ipifuncs.c --- arch/amd64/amd64/ipifuncs.c 23 Nov 2015 22:57:12 -0000 1.28 +++ arch/amd64/amd64/ipifuncs.c 6 Mar 2017 00:44:02 -0000 @@ -65,6 +65,8 @@ void x86_64_ipi_halt(struct cpu_info *); void x86_64_ipi_synch_fpu(struct cpu_info *); void x86_64_ipi_flush_fpu(struct cpu_info *); +void x86_64_ipi_xcall(struct cpu_info *); + #if NVMM > 0 void x86_64_ipi_start_vmm(struct cpu_info *); void x86_64_ipi_stop_vmm(struct cpu_info *); @@ -102,6 +104,7 @@ void (*ipifunc[X86_NIPI])(struct cpu_inf NULL, NULL, #endif + x86_64_ipi_xcall, }; void @@ -163,3 +166,13 @@ x86_64_ipi_stop_vmm(struct cpu_info *ci) stop_vmm_on_cpu(ci); } #endif /* NVMM > 0 */ + +void +x86_64_ipi_xcall(struct cpu_info *ci) +{ + /* + * this is an inlining of softintr() because we already have + * curcpu() and the SIR_XCALL bit to set. + */ + x86_atomic_setbits_u64(&ci->ci_ipending, 1UL << SIR_XCALL); +}; Index: arch/amd64/amd64/lapic.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/lapic.c,v retrieving revision 1.45 diff -u -p -r1.45 lapic.c --- arch/amd64/amd64/lapic.c 1 Nov 2016 01:13:19 -0000 1.45 +++ arch/amd64/amd64/lapic.c 6 Mar 2017 00:44:02 -0000 @@ -35,6 +35,9 @@ #include #include #include +#include +#include +#include #include @@ -62,13 +65,43 @@ #include #endif -struct evcount clk_count; #ifdef MULTIPROCESSOR struct evcount ipi_count; #endif +struct lapic { + char lapic_clk_name[16]; + struct evcount lapic_clk_count; + + struct task_heap lapic_tasks; + int lapic_now; + int lapic_state; +#define LAPIC_S_IDLE 0 +#define LAPIC_S_RUNNING 1 + + struct task lapic_hardclock; + struct task lapic_statclock; + struct intrframe *lapic_frame; + + struct task lapic_tmo_wait; /* waits in lapic_tasks */ + struct task lapic_tmo_fire; /* runs in ci_xcall_list */ + struct mutex lapic_tmo_mtx; /* protects tmo_tasks */ + struct task_heap lapic_tmo_tasks; +}; + +int lapic_add(struct lapic *, struct task *, int); +void lapic_run(struct lapic *, int); + +void lapic_hardclock(void *); +void lapic_statclock(void *); + +void lapic_tmo_wait(void *); +void lapic_tmo_fire(void *); + +int lapic_tmo_add_usec(struct cpu_info *, struct task *, int); +int lapic_tmo_del(struct cpu_info *, struct task *); + void lapic_delay(int); -static u_int32_t lapic_gettick(void); void lapic_clockintr(void *, struct intrframe); void lapic_initclocks(void); void lapic_map(paddr_t); @@ -223,10 +256,37 @@ lapic_map(paddr_t lapic_base) enable_intr(); } +void +lapic_attach(struct cpu_info *ci) +{ + static u_int64_t clk_irq = 0; + struct lapic *l; + + l = malloc(sizeof(*l), M_DEVBUF, M_WAITOK); + + snprintf(l->lapic_clk_name, sizeof(l->lapic_clk_name), + "cpu%uclk", CPU_INFO_UNIT(ci)); + evcount_attach(&l->lapic_clk_count, l->lapic_clk_name, &clk_irq); + + HEAP_INIT(task_heap, &l->lapic_tasks); + l->lapic_now = 0; + l->lapic_state = LAPIC_S_IDLE; + + task_set(&l->lapic_hardclock, lapic_hardclock, l); + task_set(&l->lapic_statclock, lapic_statclock, l); + + task_set(&l->lapic_tmo_wait, lapic_tmo_wait, ci); + task_set(&l->lapic_tmo_fire, lapic_tmo_fire, l); + mtx_init(&l->lapic_tmo_mtx, IPL_HIGH); + HEAP_INIT(task_heap, &l->lapic_tmo_tasks); + + ci->ci_lapic = l; +} /* * enable local apic */ + void lapic_enable(void) { @@ -330,7 +390,6 @@ lapic_set_lvt(void) void lapic_boot_init(paddr_t lapic_base) { - static u_int64_t clk_irq = 0; #ifdef MULTIPROCESSOR static u_int64_t ipi_irq = 0; #endif @@ -364,13 +423,12 @@ lapic_boot_init(paddr_t lapic_base) idt_vec_set(LAPIC_HYPERV_VECTOR, Xintr_hyperv_upcall); #endif - evcount_attach(&clk_count, "clock", &clk_irq); #ifdef MULTIPROCESSOR evcount_attach(&ipi_count, "ipi", &ipi_irq); #endif } -static __inline u_int32_t +static inline u_int32_t lapic_gettick(void) { return lapic_readreg(LAPIC_CCR_TIMER); @@ -378,8 +436,6 @@ lapic_gettick(void) #include /* for hz */ -u_int32_t lapic_tval; - /* * this gets us up to a 4GHz busclock.... */ @@ -388,34 +444,258 @@ u_int32_t lapic_frac_usec_per_cycle; u_int64_t lapic_frac_cycle_per_usec; u_int32_t lapic_delaytab[26]; +unsigned int lapic_misses; + void lapic_clockintr(void *arg, struct intrframe frame) { struct cpu_info *ci = curcpu(); + struct lapic *l = ci->ci_lapic; + struct task *t, key; int floor; + int now; + + l->lapic_state = LAPIC_S_IDLE; + l->lapic_frame = &frame; + now = l->lapic_now + lapic_readreg(LAPIC_ICR_TIMER); floor = ci->ci_handled_intr_level; ci->ci_handled_intr_level = ci->ci_ilevel; - hardclock((struct clockframe *)&frame); + + key.t_deadline = now; + + while ((t = HEAP_CEXTRACT(task_heap, &l->lapic_tasks, &key)) != NULL) { + CLR(t->t_flags, TASK_ONQUEUE); + (*t->t_func)(t->t_arg); + } + ci->ci_handled_intr_level = floor; - clk_count.ec_count++; + if (l->lapic_state != LAPIC_S_RUNNING) + lapic_run(l, now); + + ci->ci_lapic->lapic_clk_count.ec_count++; +} + +static inline int64_t +lapic_usec(int usec) +{ + if (usec <= 25) + return (lapic_delaytab[usec]); + + return ((lapic_frac_cycle_per_usec * usec) >> 32); +} + +void +lapic_run(struct lapic *l, int now) +{ + struct task *t; + int diff; + + t = HEAP_FIRST(task_heap, &l->lapic_tasks); + diff = t->t_deadline - now; + if (diff < 1) + diff = 1; + + l->lapic_now = now; + l->lapic_state = LAPIC_S_RUNNING; + lapic_writereg(LAPIC_LVTT, LAPIC_TIMER_VECTOR); + lapic_writereg(LAPIC_ICR_TIMER, diff); +} + +int +lapic_add(struct lapic *l, struct task *t, int usec) +{ + int rv = 1; + int now; + + lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_M | LAPIC_TIMER_VECTOR); + now = l->lapic_now; + now += lapic_readreg(LAPIC_ICR_TIMER) - lapic_readreg(LAPIC_CCR_TIMER); + lapic_writereg(LAPIC_ICR_TIMER, 0); + + if (ISSET(t->t_flags, TASK_ONQUEUE)) { + HEAP_REMOVE(task_heap, &l->lapic_tasks, t); + rv = 0; + } else + SET(t->t_flags, TASK_ONQUEUE); + + t->t_deadline = now + lapic_usec(usec); + HEAP_INSERT(task_heap, &l->lapic_tasks, t); + + lapic_run(l, now); + + return (rv); +} + +void +lapic_hardclock(void *arg) +{ + struct lapic *l = arg; + + hardclock(l->lapic_frame); + + lapic_add(l, &l->lapic_hardclock, 1000000 / hz); +} + +void +lapic_statclock(void *arg) +{ + struct lapic *l = arg; + + statclock(l->lapic_frame); + + lapic_add(l, &l->lapic_statclock, 1000000 / stathz); +} + +void +lapic_tmo_wait(void *arg) +{ + struct cpu_info *ci = arg; + struct lapic *l = ci->ci_lapic; + struct task *t = &l->lapic_tmo_fire; + + if (ISSET(t->t_flags, TASK_ONQUEUE)) + return; + + mtx_enter(&ci->ci_xcall_mtx); + if (!ISSET(t->t_flags, TASK_ONQUEUE)) { + TAILQ_INSERT_TAIL(&ci->ci_xcall_list, t, t_entry); + SET(t->t_flags, TASK_ONQUEUE); + + x86_atomic_setbits_u64(&ci->ci_ipending, 1UL << SIR_XCALL); + } + mtx_leave(&ci->ci_xcall_mtx); +} + +void +lapic_tmo_fire(void *arg) +{ + struct lapic *l = arg; + struct task *t; + struct task work; + + mtx_enter(&l->lapic_tmo_mtx); + while ((t = HEAP_CEXTRACT(task_heap, &l->lapic_tmo_tasks, + &l->lapic_tmo_wait)) != NULL) { + CLR(t->t_flags, TASK_ONQUEUE); + work = *t; + mtx_leave(&l->lapic_tmo_mtx); + + (*work.t_func)(work.t_arg); + + if (HEAP_EMPTY(task_heap, &l->lapic_tmo_tasks)) { + /* short circuit */ + break; + } + + mtx_enter(&l->lapic_tmo_mtx); + } + mtx_leave(&l->lapic_tmo_mtx); +} + +struct lapic_tmo_proxy { + struct task *t; + int usecs; + int rv; + volatile int spin; +}; + +void +_lapic_tmo_add(void *arg) +{ + struct lapic_tmo_proxy *proxy = arg; + struct task *t = proxy->t; + struct lapic *l = curcpu()->ci_lapic; + struct task *wait = &l->lapic_tmo_wait; + int now; + + mtx_enter(&l->lapic_tmo_mtx); + lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_M | LAPIC_TIMER_VECTOR); + now = l->lapic_now; + now += lapic_readreg(LAPIC_ICR_TIMER) - lapic_readreg(LAPIC_CCR_TIMER); + lapic_writereg(LAPIC_ICR_TIMER, 0); + + if (ISSET(t->t_flags, TASK_ONQUEUE)) { + HEAP_REMOVE(task_heap, &l->lapic_tmo_tasks, t); + proxy->rv = 0; + } else { + SET(t->t_flags, TASK_ONQUEUE); + proxy->rv = 1; + } + + t->t_deadline = now + lapic_usec(proxy->usecs); + HEAP_INSERT(task_heap, &l->lapic_tmo_tasks, t); + + /* get the deadline for the hardclock wait task */ + t = HEAP_FIRST(task_heap, &l->lapic_tmo_tasks); + + if (ISSET(wait->t_flags, TASK_ONQUEUE)) + HEAP_REMOVE(task_heap, &l->lapic_tasks, wait); + else + SET(wait->t_flags, TASK_ONQUEUE); + + wait->t_deadline = t->t_deadline; + HEAP_INSERT(task_heap, &l->lapic_tasks, wait); + + lapic_run(l, now); + mtx_leave(&l->lapic_tmo_mtx); + + proxy->spin = 0; /* release the other cpu */ +} + +int +lapic_tmo_add(struct cpu_info *ci, struct task *t, int usecs) +{ + struct lapic_tmo_proxy proxy = { .t = t, .usecs = usecs, .spin = 1 }; + struct task xc = TASK_INITIALIZER(_lapic_tmo_add, &proxy); + + cpu_xcall(ci, &xc); + + while (proxy.spin) + __asm volatile("pause": : :"memory"); + + return (proxy.rv); +} + +int +lapic_tmo_del(struct cpu_info *ci, struct task *t) +{ + struct lapic *l = ci->ci_lapic; + int rv = 0; + + /* + * reach over to the target cpu and remove the task directly. this + * doesnt remove or reschedule the hardclock wait task, it just lets + * it happen and have no effect. + */ + + if (!ISSET(t->t_flags, TASK_ONQUEUE)) + return (0); + + mtx_enter(&l->lapic_tmo_mtx); + if (ISSET(t->t_flags, TASK_ONQUEUE)) { + HEAP_REMOVE(task_heap, &l->lapic_tmo_tasks, t); + CLR(t->t_flags, TASK_ONQUEUE); + rv = 1; + } + mtx_leave(&l->lapic_tmo_mtx); + + return (rv); } void lapic_startclock(void) { + struct cpu_info *ci = curcpu(); + struct lapic *l = ci->ci_lapic; + /* - * Start local apic countdown timer running, in repeated mode. - * - * Mask the clock interrupt and set mode, - * then set divisor, - * then unmask and set the vector. + * Start local apic countdown timer running; */ - lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_LVTT_M); lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1); - lapic_writereg(LAPIC_ICR_TIMER, lapic_tval); - lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_TIMER_VECTOR); + lapic_add(l, &l->lapic_hardclock, 1000000 / hz); + lapic_add(l, &l->lapic_statclock, 1000000 / stathz); } void @@ -503,17 +783,7 @@ lapic_calibrate_timer(struct cpu_info *c ci->ci_dev->dv_xname, tmp / (1000 * 1000)); if (lapic_per_second != 0) { - /* - * reprogram the apic timer to run in periodic mode. - * XXX need to program timer on other cpu's, too. - */ - lapic_tval = (lapic_per_second * 2) / hz; - lapic_tval = (lapic_tval / 2) + (lapic_tval & 0x1); - - lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_TM | LAPIC_LVTT_M | - LAPIC_TIMER_VECTOR); - lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1); - lapic_writereg(LAPIC_ICR_TIMER, lapic_tval); + stathz = 128; /* * Compute fixed-point ratios between cycles and @@ -535,11 +805,13 @@ lapic_calibrate_timer(struct cpu_info *c lapic_delaytab[i] = (lapic_frac_cycle_per_usec * i) >> 32; + lapic_startclock(); + /* * Now that the timer's calibrated, use the apic timer routines * for all our timing needs.. */ - delay_func = lapic_delay; + // delay_func = lapic_delay; initclock_func = lapic_initclocks; } } @@ -551,28 +823,32 @@ lapic_calibrate_timer(struct cpu_info *c void lapic_delay(int usec) { - int32_t tick, otick; - int64_t deltat; /* XXX may want to be 64bit */ - - otick = lapic_gettick(); + struct lapic *l = curcpu()->ci_lapic; + int tick, otick; + int now, onow; + int64_t deltat; if (usec <= 0) return; - if (usec <= 25) - deltat = lapic_delaytab[usec]; - else - deltat = (lapic_frac_cycle_per_usec * usec) >> 32; - while (deltat > 0) { + deltat = lapic_usec(usec); + + onow = l->lapic_now; + otick = lapic_gettick(); + + do { + now = l->lapic_now; tick = lapic_gettick(); - if (tick > otick) - deltat -= lapic_tval - (tick - otick); - else + + if (now != onow) { + /* this is a new epoch, skip calculating a diff */ + onow = now; + } else deltat -= otick - tick; otick = tick; - - x86_pause(); - } + + __asm volatile("pause": : :"memory"); + } while (deltat > 0); } /* Index: arch/amd64/amd64/lock_machdep.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/lock_machdep.c,v retrieving revision 1.10 diff -u -p -r1.10 lock_machdep.c --- arch/amd64/amd64/lock_machdep.c 19 Mar 2016 11:34:22 -0000 1.10 +++ arch/amd64/amd64/lock_machdep.c 6 Mar 2017 00:44:02 -0000 @@ -19,6 +19,8 @@ #include #include +#include +#include #include #include @@ -30,36 +32,41 @@ void __mp_lock_init(struct __mp_lock *mpl) { memset(mpl->mpl_cpus, 0, sizeof(mpl->mpl_cpus)); + mpl->mpl_owner = NULL; mpl->mpl_users = 0; mpl->mpl_ticket = 0; } -#if defined(MP_LOCKDEBUG) -#ifndef DDB -#error "MP_LOCKDEBUG requires DDB" -#endif - +#ifdef MP_LOCKDEBUG /* CPU-dependent timing, needs this to be settable from ddb. */ extern int __mp_lock_spinout; #endif -static __inline void +static inline void __mp_lock_spin(struct __mp_lock *mpl, u_int me) { -#ifndef MP_LOCKDEBUG - while (mpl->mpl_ticket != me) - SPINLOCK_SPIN_HOOK; -#else - int nticks = __mp_lock_spinout; +#ifdef MP_LOCKDEBUG + struct cpu_info *owner = NULL; + unsigned int spins = __mp_lock_spinout; +#endif - while (mpl->mpl_ticket != me && --nticks > 0) - SPINLOCK_SPIN_HOOK; + while (mpl->mpl_ticket != me) { +#ifdef MP_LOCKDEBUG + struct cpu_info *them = mpl->mpl_owner; + if (owner != them) { + owner = them; + spins = __mp_lock_spinout; + } else if (--spins == 0) { + /* check for deadlock */ + lock_check(owner); + spins = __mp_lock_spinout; + } +#endif - if (nticks == 0) { - db_printf("__mp_lock(%p): lock spun out", mpl); - Debugger(); + SPINLOCK_SPIN_HOOK; } -#endif + + mpl->mpl_owner = curcpu(); } static inline u_int @@ -80,8 +87,10 @@ __mp_lock(struct __mp_lock *mpl) long rf = read_rflags(); disable_intr(); - if (cpu->mplc_depth++ == 0) + if (cpu->mplc_depth++ == 0) { + lock_enter(mpl, LOCK_TYPE_MPLOCK); cpu->mplc_ticket = fetch_and_add(&mpl->mpl_users, 1); + } write_rflags(rf); __mp_lock_spin(mpl, cpu->mplc_ticket); @@ -101,8 +110,11 @@ __mp_unlock(struct __mp_lock *mpl) #endif disable_intr(); - if (--cpu->mplc_depth == 0) + if (--cpu->mplc_depth == 0) { + mpl->mpl_owner = NULL; mpl->mpl_ticket++; + lock_leave(mpl, LOCK_TYPE_MPLOCK); + } write_rflags(rf); } @@ -116,7 +128,9 @@ __mp_release_all(struct __mp_lock *mpl) disable_intr(); rv = cpu->mplc_depth; cpu->mplc_depth = 0; + mpl->mpl_owner = NULL; mpl->mpl_ticket++; + lock_leave(mpl, LOCK_TYPE_MPLOCK); write_rflags(rf); return (rv); Index: arch/amd64/amd64/mutex.S =================================================================== RCS file: arch/amd64/amd64/mutex.S diff -N arch/amd64/amd64/mutex.S --- arch/amd64/amd64/mutex.S 2 Jun 2013 01:55:52 -0000 1.9 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,157 +0,0 @@ -/* $OpenBSD: mutex.S,v 1.9 2013/06/02 01:55:52 kettenis Exp $ */ - -/* - * Copyright (c) 2004 Artur Grabowski - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "assym.h" - -#include -#include -#include -#include -#include -#include - -/* - * Yeah, we don't really need to implement mtx_init here, but let's keep - * all the functions in the same place. - */ -ENTRY(__mtx_init) - movl %esi, MTX_WANTIPL(%rdi) - movl $0, MTX_OLDIPL(%rdi) - movq $0, MTX_OWNER(%rdi) - ret - -ENTRY(mtx_enter) -1: movl MTX_WANTIPL(%rdi), %eax - movq CPUVAR(SELF), %rcx - movl CPU_INFO_ILEVEL(%rcx), %edx # oipl = cpl; - cmpl %eax, %edx # if (cpl < mtx->mtx_wantipl) - cmovge %edx, %eax - movl %eax, CPU_INFO_ILEVEL(%rcx) # cpl = mtx->mtx_wantipl; - /* - * %edx - the old ipl - * %rcx - curcpu() - */ - xorq %rax, %rax -#ifdef MULTIPROCESSOR - lock -#endif - cmpxchgq %rcx, MTX_OWNER(%rdi) # test_and_set(mtx->mtx_owner) - jne 2f - movl %edx, MTX_OLDIPL(%rdi) -#ifdef DIAGNOSTIC - incl CPU_INFO_MUTEX_LEVEL(%rcx) -#endif - ret - - /* We failed to obtain the lock. splx, spin and retry. */ -2: pushq %rdi - movl %edx, %edi - call _C_LABEL(spllower) - popq %rdi -#ifdef DIAGNOSTIC - movq CPUVAR(SELF), %rcx - cmpq MTX_OWNER(%rdi), %rcx - je 4f -#endif -3: - movq MTX_OWNER(%rdi), %rax - testq %rax, %rax - jz 1b - jmp 3b -#ifdef DIAGNOSTIC -4: movq $5f, %rdi - call _C_LABEL(panic) -5: .asciz "mtx_enter: locking against myself" -#endif - -ENTRY(mtx_enter_try) -1: movl MTX_WANTIPL(%rdi), %eax - movq CPUVAR(SELF), %rcx - movl CPU_INFO_ILEVEL(%rcx), %edx # oipl = cpl; - cmpl %eax, %edx # if (cpl < mtx->mtx_wantipl) - cmovge %edx, %eax - movl %eax, CPU_INFO_ILEVEL(%rcx) # cpl = mtx->mtx_wantipl; - /* - * %edx - the old ipl - * %rcx - curcpu() - */ - xorq %rax, %rax -#ifdef MULTIPROCESSOR - lock -#endif - cmpxchgq %rcx, MTX_OWNER(%rdi) # test_and_set(mtx->mtx_owner) - jne 2f - movl %edx, MTX_OLDIPL(%rdi) -#ifdef DIAGNOSTIC - incl CPU_INFO_MUTEX_LEVEL(%rcx) -#endif - movq $1, %rax - ret - - /* We failed to obtain the lock. splx and return 0. */ -2: pushq %rdi - movl %edx, %edi - call _C_LABEL(spllower) - popq %rdi -#ifdef DIAGNOSTIC - movq CPUVAR(SELF), %rcx - cmpq MTX_OWNER(%rdi), %rcx - je 3f -#endif - xorq %rax, %rax - ret - -#ifdef DIAGNOSTIC -3: movq $4f, %rdi - call _C_LABEL(panic) -4: .asciz "mtx_enter_try: locking against myself" -#endif - - -ENTRY(mtx_leave) - movq %rdi, %rax -#ifdef DIAGNOSTIC - movq CPUVAR(SELF), %rcx - cmpq MTX_OWNER(%rax), %rcx - jne 2f - decl CPU_INFO_MUTEX_LEVEL(%rcx) -#endif - xorq %rcx, %rcx - movl MTX_OLDIPL(%rax), %edi - movl %ecx, MTX_OLDIPL(%rax) - movq %rcx, MTX_OWNER(%rax) - cmpl %edi, CPUVAR(ILEVEL) - je 1f - call _C_LABEL(spllower) -1: - ret - -#ifdef DIAGNOSTIC -2: movq $3f, %rdi - call _C_LABEL(panic) -3: .asciz "mtx_leave: lock not held" -#endif Index: arch/amd64/amd64/mutex.c =================================================================== RCS file: arch/amd64/amd64/mutex.c diff -N arch/amd64/amd64/mutex.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ arch/amd64/amd64/mutex.c 6 Mar 2017 00:44:02 -0000 @@ -0,0 +1,168 @@ +/* $OpenBSD: mutex.c,v 1.16 2016/06/13 01:26:14 dlg Exp $ */ + +/* + * Copyright (c) 2004 Artur Grabowski + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +void +__mtx_init(struct mutex *mtx, int wantipl) +{ + mtx->mtx_owner = NULL; + mtx->mtx_oldipl = IPL_NONE; + mtx->mtx_wantipl = wantipl; +} + +#ifdef MULTIPROCESSOR + +void +mtx_enter(struct mutex *mtx) +{ +#ifdef MP_LOCKDEBUG + extern int __mp_lock_spinout; + unsigned int spins = __mp_lock_spinout; + struct cpu_info *them; +#endif + struct cpu_info *owner; + struct cpu_info *ci = curcpu(); + int s = IPL_NONE; + + if (mtx->mtx_wantipl != IPL_NONE) + s = splraise(mtx->mtx_wantipl); + + lock_enter(mtx, LOCK_TYPE_MUTEX); + while ((owner = atomic_cas_ptr(&mtx->mtx_owner, NULL, ci)) != NULL) { + if (owner == ci) + panic("%p: locking against myself", mtx); +#ifdef MP_LOCKDEBUG + if (owner != them) { + them = owner; + spins = __mp_lock_spinout; + } else if (--spins == 0) { + /* check for deadlock */ + lock_check(them); + spins = __mp_lock_spinout; + } +#endif + + SPINLOCK_SPIN_HOOK; + } + +#ifdef DIAGNOSTIC + ci->ci_mutex_level++; +#endif + + __splbarrier(); + mtx->mtx_oldipl = s; +} + +int +mtx_enter_try(struct mutex *mtx) +{ + struct cpu_info *ci = curcpu(); + struct cpu_info *owner; + int s = IPL_NONE; + + if (mtx->mtx_wantipl != IPL_NONE) + s = splraise(mtx->mtx_wantipl); + + owner = atomic_cas_ptr(&mtx->mtx_owner, NULL, ci); + if (owner != NULL) { + KASSERTMSG(owner != ci, "%p: locking against myself", mtx); + + if (mtx->mtx_wantipl != IPL_NONE) + splx(s); + return (0); + } + + lock_enter(mtx, LOCK_TYPE_MUTEX); + + __splbarrier(); + mtx->mtx_oldipl = s; + +#ifdef DIAGNOSTIC + ci->ci_mutex_level++; +#endif + + return (1); +} + +#else /* MULTIPROCESSOR */ + +void +mtx_enter(struct mutex *mtx) +{ + struct cpu_info *ci = curcpu(); + + KASSERTMSG(mtx->mtx_owner != ci, "%p: locking against myself", mtx); + if (mtx->mtx_wantipl != IPL_NONE) + mtx->mtx_oldipl = splraise(mtx->mtx_wantipl); + + mtx->mtx_owner = ci; + +#ifdef DIAGNOSTIC + ci->ci_mutex_level++; +#endif +} + +int +mtx_enter_try(struct mutex *mtx) +{ + mtx_enter(mtx); + return (1); +} + +#endif + +void +mtx_leave(struct mutex *mtx) +{ + int s; + + MUTEX_ASSERT_LOCKED(mtx); + +#ifdef DIAGNOSTIC + curcpu()->ci_mutex_level--; +#endif + + s = mtx->mtx_oldipl; + __splbarrier(); + mtx->mtx_owner = NULL; +#ifdef MULTIPROCESSOR + lock_leave(mtx, LOCK_TYPE_MUTEX); +#endif + if (mtx->mtx_wantipl != IPL_NONE) + splx(s); +} Index: arch/amd64/amd64/softintr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/softintr.c,v retrieving revision 1.9 diff -u -p -r1.9 softintr.c --- arch/amd64/amd64/softintr.c 28 Aug 2015 00:03:53 -0000 1.9 +++ arch/amd64/amd64/softintr.c 6 Mar 2017 00:44:02 -0000 @@ -168,3 +168,49 @@ softintr_disestablish(void *arg) free(sih, M_DEVBUF, sizeof(*sih)); } + +#ifdef MULTIPROCESSOR +#include + +void +cpu_xcall(struct cpu_info *ci, struct task *t) +{ + if (ci == curcpu()) { + /* execute the task immediately on the local cpu */ + int s = splsoftclock(); + (*t->t_func)(t->t_arg); + splx(s); + } else { + mtx_enter(&ci->ci_xcall_mtx); + SET(t->t_flags, TASK_ONQUEUE); + TAILQ_INSERT_TAIL(&ci->ci_xcall_list, t, t_entry); + mtx_leave(&ci->ci_xcall_mtx); + + x86_send_ipi(ci, X86_IPI_XCALL); + } +} + +void +cpu_xcall_dispatch(void) +{ + struct cpu_info *ci = curcpu(); + struct task *t; + struct task work; + + while (!TAILQ_EMPTY(&ci->ci_xcall_list)) { + mtx_enter(&ci->ci_xcall_mtx); + t = TAILQ_FIRST(&ci->ci_xcall_list); + if (t == NULL) { + mtx_enter(&ci->ci_xcall_mtx); + break; + } + + TAILQ_REMOVE(&ci->ci_xcall_list, t, t_entry); + CLR(t->t_flags, TASK_ONQUEUE); + work = *t; + mtx_leave(&ci->ci_xcall_mtx); + + (*work.t_func)(work.t_arg); + } +} +#endif Index: arch/amd64/amd64/vector.S =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/vector.S,v retrieving revision 1.47 diff -u -p -r1.47 vector.S --- arch/amd64/amd64/vector.S 4 Sep 2016 09:22:28 -0000 1.47 +++ arch/amd64/amd64/vector.S 6 Mar 2017 00:44:02 -0000 @@ -1108,3 +1108,11 @@ IDTVEC(softclock) call _C_LABEL(softintr_dispatch) decl CPUVAR(IDEPTH) jmp *%r13 + +IDTVEC(xcallintr) + movl $IPL_SOFTCLOCK, CPUVAR(ILEVEL) + sti + incl CPUVAR(IDEPTH) + call _C_LABEL(cpu_xcall_dispatch) + decl CPUVAR(IDEPTH) + jmp *%r13 Index: arch/amd64/conf/files.amd64 =================================================================== RCS file: /cvs/src/sys/arch/amd64/conf/files.amd64,v retrieving revision 1.87 diff -u -p -r1.87 files.amd64 --- arch/amd64/conf/files.amd64 21 Jan 2017 10:58:15 -0000 1.87 +++ arch/amd64/conf/files.amd64 6 Mar 2017 00:44:02 -0000 @@ -26,7 +26,7 @@ file arch/amd64/amd64/fpu.c file arch/amd64/amd64/softintr.c file arch/amd64/amd64/i8259.c file arch/amd64/amd64/cacheinfo.c -file arch/amd64/amd64/mutex.S +file arch/amd64/amd64/mutex.c file arch/amd64/amd64/vector.S file arch/amd64/amd64/copy.S file arch/amd64/amd64/spl.S Index: arch/amd64/include/cpu.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v retrieving revision 1.108 diff -u -p -r1.108 cpu.h --- arch/amd64/include/cpu.h 2 Mar 2017 10:38:10 -0000 1.108 +++ arch/amd64/include/cpu.h 6 Mar 2017 00:44:02 -0000 @@ -52,6 +52,9 @@ #include #include +/* for xcalls */ +#include +#include #ifdef _KERNEL /* VMXON region (Intel) */ @@ -84,6 +87,8 @@ union vmm_cpu_cap { struct svm vcc_svm; }; +struct lapic; + struct x86_64_tss; struct cpu_info { struct device *ci_dev; @@ -117,6 +122,7 @@ struct cpu_info { int ci_mutex_level; #endif + struct lapic *ci_lapic; volatile u_int ci_flags; u_int32_t ci_ipis; @@ -171,6 +177,8 @@ struct cpu_info { #ifdef MULTIPROCESSOR struct srp_hazard ci_srp_hazards[SRP_HAZARD_NUM]; + struct mutex ci_xcall_mtx; + struct task_list ci_xcall_list; #endif struct ksensordev ci_sensordev; Index: arch/amd64/include/i82489var.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/i82489var.h,v retrieving revision 1.17 diff -u -p -r1.17 i82489var.h --- arch/amd64/include/i82489var.h 22 Jun 2016 01:12:38 -0000 1.17 +++ arch/amd64/include/i82489var.h 6 Mar 2017 00:44:02 -0000 @@ -119,6 +119,7 @@ struct cpu_info; extern void lapic_boot_init(paddr_t); extern void lapic_set_lvt(void); +extern void lapic_attach(struct cpu_info *); extern void lapic_enable(void); extern void lapic_disable(void); extern void lapic_calibrate_timer(struct cpu_info *ci); Index: arch/amd64/include/intr.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/intr.h,v retrieving revision 1.29 diff -u -p -r1.29 intr.h --- arch/amd64/include/intr.h 13 Sep 2015 11:48:17 -0000 1.29 +++ arch/amd64/include/intr.h 6 Mar 2017 00:44:02 -0000 @@ -219,6 +219,8 @@ void x86_ipi_handler(void); void x86_setperf_ipi(struct cpu_info *); extern void (*ipifunc[X86_NIPI])(struct cpu_info *); + +extern void Xxcallintr(void); #endif #endif /* !_LOCORE */ Index: arch/amd64/include/intrdefs.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/intrdefs.h,v retrieving revision 1.16 diff -u -p -r1.16 intrdefs.h --- arch/amd64/include/intrdefs.h 22 Jun 2016 01:12:38 -0000 1.16 +++ arch/amd64/include/intrdefs.h 6 Mar 2017 00:44:02 -0000 @@ -53,9 +53,10 @@ #define SIR_CLOCK 61 #define SIR_NET 60 #define SIR_TTY 59 +#define SIR_XCALL 58 -#define LIR_XEN 58 -#define LIR_HYPERV 57 +#define LIR_XEN 57 +#define LIR_HYPERV 56 /* * Maximum # of interrupt sources per CPU. 64 to fit in one word. @@ -83,13 +84,14 @@ #define X86_IPI_DDB 0x00000080 #define X86_IPI_START_VMM 0x00000100 #define X86_IPI_STOP_VMM 0x00000200 +#define X86_IPI_XCALL 0x00000400 -#define X86_NIPI 10 +#define X86_NIPI 11 #define X86_IPI_NAMES { "halt IPI", "nop IPI", "FPU flush IPI", \ "FPU synch IPI", "TLB shootdown IPI", \ "MTRR update IPI", "setperf IPI", "ddb IPI", \ - "VMM start IPI", "VMM stop IPI" } + "VMM start IPI", "VMM stop IPI", "xcall IPI" } #define IREENT_MAGIC 0x18041969 Index: arch/amd64/include/mplock.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/mplock.h,v retrieving revision 1.3 diff -u -p -r1.3 mplock.h --- arch/amd64/include/mplock.h 14 Mar 2014 02:08:57 -0000 1.3 +++ arch/amd64/include/mplock.h 6 Mar 2017 00:44:02 -0000 @@ -34,6 +34,7 @@ struct __mp_lock_cpu { struct __mp_lock { struct __mp_lock_cpu mpl_cpus[MAXCPUS]; + struct cpu_info *mpl_owner; volatile u_int mpl_ticket; u_int mpl_users; }; Index: arch/amd64/include/mutex.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/mutex.h,v retrieving revision 1.7 diff -u -p -r1.7 mutex.h --- arch/amd64/include/mutex.h 29 Mar 2014 18:09:28 -0000 1.7 +++ arch/amd64/include/mutex.h 6 Mar 2017 00:44:02 -0000 @@ -30,7 +30,7 @@ struct mutex { int mtx_wantipl; int mtx_oldipl; - volatile void *mtx_owner; + void *mtx_owner; }; /* Index: ddb/db_command.c =================================================================== RCS file: /cvs/src/sys/ddb/db_command.c,v retrieving revision 1.71 diff -u -p -r1.71 db_command.c --- ddb/db_command.c 19 Apr 2016 12:23:25 -0000 1.71 +++ ddb/db_command.c 6 Mar 2017 00:44:02 -0000 @@ -505,6 +505,17 @@ db_extent_print_cmd(db_expr_t addr, int extent_print_all(); } +#ifdef MP_LOCKDEBUG +void _lock_stack_print(int (*)(const char *, ...)); + +/*ARGSUSED*/ +void +db_locks_print_cmd(db_expr_t addr, int have_addr, db_expr_t count, char *modif) +{ + _lock_stack_print(db_printf); +} +#endif + /*ARGSUSED*/ void db_pool_print_cmd(db_expr_t addr, int have_addr, db_expr_t count, char *modif) @@ -562,6 +573,9 @@ struct db_command db_show_cmds[] = { { "breaks", db_listbreak_cmd, 0, NULL }, { "buf", db_buf_print_cmd, 0, NULL }, { "extents", db_extent_print_cmd, 0, NULL }, +#ifdef MP_LOCKDEBUG + { "locks", db_locks_print_cmd, 0, NULL }, +#endif { "malloc", db_malloc_print_cmd, 0, NULL }, { "map", db_map_print_cmd, 0, NULL }, { "mbuf", db_mbuf_print_cmd, 0, NULL }, Index: kern/kern_event.c =================================================================== RCS file: /cvs/src/sys/kern/kern_event.c,v retrieving revision 1.78 diff -u -p -r1.78 kern_event.c --- kern/kern_event.c 11 Feb 2017 19:51:06 -0000 1.78 +++ kern/kern_event.c 6 Mar 2017 00:44:02 -0000 @@ -320,25 +320,57 @@ filt_proc(struct knote *kn, long hint) return (kn->kn_fflags != 0); } +struct kq_timeout { + struct timeout kq_tmo; + struct task kq_t; + struct refcnt kq_refs; + struct cpu_info *kq_ci; +}; + +int lapic_tmo_add(struct cpu_info *, struct task *, int); +int lapic_tmo_del(struct cpu_info *, struct task *); + static void filt_timer_timeout_add(struct knote *kn) { - struct timeval tv; - int tticks; + struct kq_timeout *kqtmo = kn->kn_hook; + int rv; + + refcnt_take(&kqtmo->kq_refs); - tv.tv_sec = kn->kn_sdata / 1000; - tv.tv_usec = (kn->kn_sdata % 1000) * 1000; - tticks = tvtohz(&tv); - timeout_add(kn->kn_hook, tticks ? tticks : 1); + if (kn->kn_sdata < 133) { + rv = lapic_tmo_add(kqtmo->kq_ci, &kqtmo->kq_t, + kn->kn_sdata * 1000); + } else { + struct timeval tv; + int tticks; + + tv.tv_sec = kn->kn_sdata / 1000; + tv.tv_usec = (kn->kn_sdata % 1000) * 1000; + tticks = tvtohz(&tv); + + rv = timeout_add(kn->kn_hook, tticks ? tticks : 1); + } + + if (rv == 0) + refcnt_rele(&kqtmo->kq_refs); } void filt_timerexpire(void *knx) { struct knote *kn = knx; + struct kq_timeout *kqtmo = kn->kn_hook; + KERNEL_LOCK(); kn->kn_data++; KNOTE_ACTIVATE(kn); + KERNEL_UNLOCK(); + + if (refcnt_rele_wake(&kqtmo->kq_refs)) { + /* timerdetach is waiting */ + return; + } if ((kn->kn_flags & EV_ONESHOT) == 0) filt_timer_timeout_add(kn); @@ -351,16 +383,22 @@ filt_timerexpire(void *knx) int filt_timerattach(struct knote *kn) { - struct timeout *to; + struct kq_timeout *kqtmo; if (kq_ntimeouts > kq_timeoutmax) return (ENOMEM); kq_ntimeouts++; kn->kn_flags |= EV_CLEAR; /* automatically set */ - to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); - timeout_set(to, filt_timerexpire, kn); - kn->kn_hook = to; + kqtmo = malloc(sizeof(*kqtmo), M_KEVENT, M_WAITOK); + + refcnt_init(&kqtmo->kq_refs); + timeout_set(&kqtmo->kq_tmo, filt_timerexpire, kn); + task_set(&kqtmo->kq_t, filt_timerexpire, kn); + + kqtmo->kq_ci = curcpu(); + + kn->kn_hook = kqtmo; filt_timer_timeout_add(kn); return (0); @@ -369,11 +407,17 @@ filt_timerattach(struct knote *kn) void filt_timerdetach(struct knote *kn) { - struct timeout *to; + struct kq_timeout *kqtmo = kn->kn_hook; + + if (lapic_tmo_del(kqtmo->kq_ci, &kqtmo->kq_t)) + refcnt_rele(&kqtmo->kq_refs); + if (timeout_del(&kqtmo->kq_tmo)) + refcnt_rele(&kqtmo->kq_refs); + + refcnt_finalize(&kqtmo->kq_refs, "kqtmrm"); + + free(kqtmo, M_KEVENT, sizeof(*kqtmo)); - to = (struct timeout *)kn->kn_hook; - timeout_del(to); - free(to, M_KEVENT, sizeof(*to)); kq_ntimeouts--; } Index: kern/kern_lock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_lock.c,v retrieving revision 1.47 diff -u -p -r1.47 kern_lock.c --- kern/kern_lock.c 19 Jun 2016 11:54:33 -0000 1.47 +++ kern/kern_lock.c 6 Mar 2017 00:44:02 -0000 @@ -46,6 +46,9 @@ int __mp_lock_spinout = 200000000; #endif #if defined(MULTIPROCESSOR) +#include +#include + /* * Functions for manipulating the kernel_lock. We put them here * so that they show up in profiles. @@ -82,4 +85,109 @@ _kernel_lock_held(void) { return (__mp_lock_held(&kernel_lock)); } + +#ifdef MP_LOCKDEBUG + +/* + * allocate a lock stack for all possible cpus up front rather than + * use bootable cpumem. the main reason for this is cpus are hatched + * and use locks before we reach a point in boot where we can call + * cpumem_malloc_ncpus. + */ +struct lock_stack _lock_stacks[MAXCPUS]; + +void +lock_idle(void) +{ + struct lock_stack *ls; + + ls = &_lock_stacks[cpu_number()]; + KASSERTMSG(ls->ls_index == 0, + "cpu%d idle with cpu locks held", cpu_number()); +} + +void +lock_check(struct cpu_info *them) +{ + struct cpu_info *self = curcpu(); + struct lock_stack *src; + struct lock_stack *tgt; + unsigned int src_idx, tgt_idx; + vaddr_t lock; + + KASSERTMSG(self != them, "cpu%d: cannot deadlock against self", + CPU_INFO_UNIT(self)); + + src = &_lock_stacks[CPU_INFO_UNIT(self)]; + tgt = &_lock_stacks[CPU_INFO_UNIT(them)]; + + /* look for the lock tgt is trying to acquire */ + tgt_idx = tgt->ls_index; + if (tgt_idx == 0) + return; + + lock = tgt->ls_entries[tgt_idx - 1]; + + /* check to see if we own the lock they want */ + for (src_idx = 0; src_idx < src->ls_index; src_idx++) { + if (tgt->ls_entries[src_idx] != lock) + continue; + + /* we may have a winner */ + + if (tgt->ls_index != tgt_idx || + lock != tgt->ls_entries[tgt_idx - 1]) { + /* tgt has made progress */ + return; + } + + printf("potential deadlock between cpu%u and cpu%u\n", + self->ci_cpuid, them->ci_cpuid); + Debugger(); + } +} + +#include +#include +#include +#include + +static inline const char * +_lock_type_name(unsigned long type) +{ + switch (type) { + case LOCK_TYPE_MPLOCK: + return ("mplock"); + case LOCK_TYPE_MUTEX: + return ("mutex"); + } + + return "(unknown!)"; +} + +void +_lock_stack_print(int (*pr)(const char *, ...)) +{ + unsigned int cpu = cpu_number(); + struct lock_stack *ls; + unsigned int index; + vaddr_t lock; + + ls = &_lock_stacks[cpu]; + + printf("lock stack at %p on cpu%d\n", ls, cpu); + + for (index = 0; index < ls->ls_index; index++) { + lock = ls->ls_entries[index]; + + (*pr)("%u: %s ", index, + _lock_type_name(lock & LOCK_TYPE_MASK)); + db_printsym((db_expr_t)(lock & ~LOCK_TYPE_MASK), + DB_STGY_XTRN, pr); + (*pr)("\n"); + } +} + +#endif /* MP_LOCKDEBUG */ + #endif /* MULTIPROCESSOR */ Index: kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.138 diff -u -p -r1.138 kern_synch.c --- kern/kern_synch.c 31 Jan 2017 12:16:20 -0000 1.138 +++ kern/kern_synch.c 6 Mar 2017 00:44:02 -0000 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -207,6 +208,7 @@ msleep(const volatile void *ident, struc */ spl = MUTEX_OLDIPL(mtx); MUTEX_OLDIPL(mtx) = splsched(); + lock_swap(); mtx_leave(mtx); sleep_finish(&sls, 1); @@ -665,11 +667,16 @@ refcnt_rele(struct refcnt *r) return (refcnt == 0); } -void +int refcnt_rele_wake(struct refcnt *r) { - if (refcnt_rele(r)) + int last; + + last = refcnt_rele(r); + if (last) wakeup_one(r); + + return (last); } void Index: kern/kern_task.c =================================================================== RCS file: /cvs/src/sys/kern/kern_task.c,v retrieving revision 1.19 diff -u -p -r1.19 kern_task.c --- kern/kern_task.c 14 Feb 2017 10:31:15 -0000 1.19 +++ kern/kern_task.c 6 Mar 2017 00:44:02 -0000 @@ -23,8 +23,6 @@ #include #include -#define TASK_ONQUEUE 1 - struct taskq { enum { TQ_S_CREATED, @@ -306,3 +304,11 @@ taskq_thread(void *xtq) kthread_exit(0); } + +static inline int +task_heap_cmp(const struct task *a, const struct task *b) +{ + return (a->t_deadline - b->t_deadline); +} + +HEAP_GENERATE(task_heap, task, _t_entry._t_heap, task_heap_cmp); Index: kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.46 diff -u -p -r1.46 sched_bsd.c --- kern/sched_bsd.c 14 Feb 2017 10:31:15 -0000 1.46 +++ kern/sched_bsd.c 6 Mar 2017 00:44:02 -0000 @@ -47,6 +47,7 @@ #include #include #include +#include #ifdef KTRACE #include @@ -354,9 +355,10 @@ mi_switch(void) * Release the kernel_lock, as we are about to yield the CPU. */ sched_count = __mp_release_all_but_one(&sched_lock); - if (__mp_lock_held(&kernel_lock)) + if (__mp_lock_held(&kernel_lock)) { + lock_swap(); hold_count = __mp_release_all(&kernel_lock); - else + } else hold_count = 0; #endif Index: kern/subr_tree.c =================================================================== RCS file: /cvs/src/sys/kern/subr_tree.c,v retrieving revision 1.6 diff -u -p -r1.6 subr_tree.c --- kern/subr_tree.c 20 Sep 2016 01:11:27 -0000 1.6 +++ kern/subr_tree.c 6 Mar 2017 00:44:02 -0000 @@ -610,3 +610,181 @@ _rb_check(const struct rb_type *t, void (unsigned long)RBE_LEFT(rbe) == poison && (unsigned long)RBE_RIGHT(rbe) == poison); } + +static inline struct _heap_entry * +heap_n2e(const struct _heap_type *t, void *node) +{ + caddr_t addr = (caddr_t)node; + + return ((struct _heap_entry *)(addr + t->t_offset)); +} + +static inline void * +heap_e2n(const struct _heap_type *t, struct _heap_entry *he) +{ + caddr_t addr = (caddr_t)he; + + return ((void *)(addr - t->t_offset)); +} + +static struct _heap_entry * +_heap_merge(const struct _heap_type *t, + struct _heap_entry *he1, struct _heap_entry *he2) +{ + struct _heap_entry *hi, *lo; + struct _heap_entry *child; + + if (he1 == NULL) + return (he2); + if (he2 == NULL) + return (he1); + + if (t->t_compare(heap_e2n(t, he1), heap_e2n(t, he2)) >= 0) { + hi = he1; + lo = he2; + } else { + lo = he1; + hi = he2; + } + + child = lo->he_child; + + hi->he_left = lo; + hi->he_nextsibling = child; + if (child != NULL) + child->he_left = hi; + lo->he_child = hi; + lo->he_left = NULL; + lo->he_nextsibling = NULL; + + return (lo); +} + +static inline void +_heap_sibling_remove(struct _heap_entry *he) +{ + if (he->he_left == NULL) + return; + + if (he->he_left->he_child == he) { + if ((he->he_left->he_child = he->he_nextsibling) != NULL) + he->he_nextsibling->he_left = he->he_left; + } else { + if ((he->he_left->he_nextsibling = he->he_nextsibling) != NULL) + he->he_nextsibling->he_left = he->he_left; + } + + he->he_left = NULL; + he->he_nextsibling = NULL; +} + +static inline struct _heap_entry * +_heap_2pass_merge(const struct _heap_type *t, struct _heap_entry *root) +{ + struct _heap_entry *node, *next = NULL; + struct _heap_entry *tmp, *list = NULL; + + node = root->he_child; + if (node == NULL) + return (NULL); + + root->he_child = NULL; + + /* first pass */ + for (next = node->he_nextsibling; next != NULL; + next = (node != NULL ? node->he_nextsibling : NULL)) { + tmp = next->he_nextsibling; + node = _heap_merge(t, node, next); + + /* insert head */ + node->he_nextsibling = list; + list = node; + node = tmp; + } + + /* odd child case */ + if (node != NULL) { + node->he_nextsibling = list; + list = node; + } + + /* second pass */ + while (list->he_nextsibling != NULL) { + tmp = list->he_nextsibling->he_nextsibling; + list = _heap_merge(t, list, list->he_nextsibling); + list->he_nextsibling = tmp; + } + + list->he_left = NULL; + list->he_nextsibling = NULL; + + return (list); +} + +void +_heap_insert(const struct _heap_type *t, struct _heap *h, void *node) +{ + struct _heap_entry *he = heap_n2e(t, node); + + he->he_left = NULL; + he->he_child = NULL; + he->he_nextsibling = NULL; + + h->h_root = _heap_merge(t, h->h_root, he); +} + +void +_heap_remove(const struct _heap_type *t, struct _heap *h, void *node) +{ + struct _heap_entry *he = heap_n2e(t, node); + + if (he->he_left == NULL) { + _heap_extract(t, h); + return; + } + + _heap_sibling_remove(he); + h->h_root = _heap_merge(t, h->h_root, _heap_2pass_merge(t, he)); +} + +void * +_heap_first(const struct _heap_type *t, struct _heap *h) +{ + struct _heap_entry *first = h->h_root; + + if (first == NULL) + return (NULL); + + return (heap_e2n(t, first)); +} + +void * +_heap_extract(const struct _heap_type *t, struct _heap *h) +{ + struct _heap_entry *first = h->h_root; + + if (first == NULL) + return (NULL); + + h->h_root = _heap_2pass_merge(t, first); + + return (heap_e2n(t, first)); +} + +void * +_heap_cextract(const struct _heap_type *t, struct _heap *h, const void *key) +{ + struct _heap_entry *first = h->h_root; + void *node; + + if (first == NULL) + return (NULL); + + node = heap_e2n(t, first); + if (t->t_compare(node, key) > 0) + return (NULL); + + h->h_root = _heap_2pass_merge(t, first); + + return (node); +} Index: net/hfsc.c =================================================================== RCS file: /cvs/src/sys/net/hfsc.c,v retrieving revision 1.35 diff -u -p -r1.35 hfsc.c --- net/hfsc.c 24 Jan 2017 03:57:35 -0000 1.35 +++ net/hfsc.c 6 Mar 2017 00:44:02 -0000 @@ -260,7 +260,7 @@ struct pool hfsc_class_pl, hfsc_internal */ unsigned int hfsc_idx(unsigned int, const struct mbuf *); -int hfsc_enq(struct ifqueue *, struct mbuf *); +struct mbuf *hfsc_enq(struct ifqueue *, struct mbuf *); struct mbuf *hfsc_deq_begin(struct ifqueue *, void **); void hfsc_deq_commit(struct ifqueue *, struct mbuf *, void *); void hfsc_purge(struct ifqueue *, struct mbuf_list *); @@ -650,7 +650,7 @@ hfsc_nextclass(struct hfsc_class *cl) return (cl); } -int +struct mbuf * hfsc_enq(struct ifqueue *ifq, struct mbuf *m) { struct hfsc_if *hif = ifq->ifq_q; @@ -660,14 +660,14 @@ hfsc_enq(struct ifqueue *ifq, struct mbu cl->cl_children != NULL) { cl = hif->hif_defaultclass; if (cl == NULL) - return (ENOBUFS); + return (m); cl->cl_pktattr = NULL; } if (ml_len(&cl->cl_q.q) >= cl->cl_q.qlimit) { /* drop occurred. mbuf needs to be freed */ PKTCNTR_INC(&cl->cl_stats.drop_cnt, m->m_pkthdr.len); - return (ENOBUFS); + return (m); } ml_enqueue(&cl->cl_q.q, m); @@ -677,7 +677,7 @@ hfsc_enq(struct ifqueue *ifq, struct mbu if (ml_len(&cl->cl_q.q) == 1) hfsc_set_active(hif, cl, m->m_pkthdr.len); - return (0); + return (NULL); } struct mbuf * Index: net/if_mobileip.c =================================================================== RCS file: net/if_mobileip.c diff -N net/if_mobileip.c Index: net/if_mobileip.h =================================================================== RCS file: net/if_mobileip.h diff -N net/if_mobileip.h Index: net/ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v retrieving revision 1.6 diff -u -p -r1.6 ifq.c --- net/ifq.c 24 Jan 2017 03:57:35 -0000 1.6 +++ net/ifq.c 6 Mar 2017 00:44:02 -0000 @@ -29,7 +29,7 @@ * priq glue */ unsigned int priq_idx(unsigned int, const struct mbuf *); -int priq_enq(struct ifqueue *, struct mbuf *); +struct mbuf *priq_enq(struct ifqueue *, struct mbuf *); struct mbuf *priq_deq_begin(struct ifqueue *, void **); void priq_deq_commit(struct ifqueue *, struct mbuf *, void *); void priq_purge(struct ifqueue *, struct mbuf_list *); @@ -70,8 +70,6 @@ void ifq_start_task(void *); void ifq_restart_task(void *); void ifq_barrier_task(void *); -#define TASK_ONQUEUE 0x1 - void ifq_serialize(struct ifqueue *ifq, struct task *t) { @@ -225,7 +223,8 @@ ifq_attach(struct ifqueue *ifq, const st ifq->ifq_q = newq; while ((m = ml_dequeue(&ml)) != NULL) { - if (ifq->ifq_ops->ifqop_enq(ifq, m) != 0) { + m = ifq->ifq_ops->ifqop_enq(ifq, m); + if (m != NULL) { ifq->ifq_qdrops++; ml_enqueue(&free_ml, m); } else @@ -252,13 +251,14 @@ ifq_destroy(struct ifqueue *ifq) } int -ifq_enqueue_try(struct ifqueue *ifq, struct mbuf *m) +ifq_enqueue(struct ifqueue *ifq, struct mbuf *m) { + struct mbuf *dm; int rv; mtx_enter(&ifq->ifq_mtx); - rv = ifq->ifq_ops->ifqop_enq(ifq, m); - if (rv == 0) { + dm = ifq->ifq_ops->ifqop_enq(ifq, m); + if (dm == NULL) { ifq->ifq_len++; ifq->ifq_packets++; @@ -269,19 +269,14 @@ ifq_enqueue_try(struct ifqueue *ifq, str ifq->ifq_qdrops++; mtx_leave(&ifq->ifq_mtx); - return (rv); -} - -int -ifq_enqueue(struct ifqueue *ifq, struct mbuf *m) -{ - int err; - - err = ifq_enqueue_try(ifq, m); - if (err != 0) - m_freem(m); + if (dm == NULL) + rv = 0; + else { + m_freem(dm); + rv = ENOBUFS; + } - return (err); + return (rv); } struct mbuf * @@ -403,14 +398,14 @@ priq_free(unsigned int idx, void *pq) free(pq, M_DEVBUF, sizeof(struct priq)); } -int +struct mbuf * priq_enq(struct ifqueue *ifq, struct mbuf *m) { struct priq *pq; struct priq_list *pl; if (ifq_len(ifq) >= ifq->ifq_maxlen) - return (ENOBUFS); + return (m); pq = ifq->ifq_q; KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO); @@ -423,7 +418,7 @@ priq_enq(struct ifqueue *ifq, struct mbu pl->tail->m_nextpkt = m; pl->tail = m; - return (0); + return (NULL); } struct mbuf * Index: net/ifq.h =================================================================== RCS file: /cvs/src/sys/net/ifq.h,v retrieving revision 1.9 diff -u -p -r1.9 ifq.h --- net/ifq.h 24 Jan 2017 10:08:30 -0000 1.9 +++ net/ifq.h 6 Mar 2017 00:44:02 -0000 @@ -117,25 +117,21 @@ struct ifqueue { * the ifqueue. All the pending mbufs are removed from the previous * conditioner and requeued on the new. * - * === ifq_enqueue() and ifq_enqueue_try() + * === ifq_enqueue() * - * ifq_enqueue() and ifq_enqueue_try() attempt to fit an mbuf onto the - * ifqueue. If the current traffic conditioner rejects the packet it - * wont be queued and will be counted as a drop. ifq_enqueue() will - * free the mbuf on the callers behalf if the packet is rejected. - * ifq_enqueue_try() does not free the mbuf, allowing the caller to - * reuse it. + * ifq_enqueue() attempts to fit an mbuf onto the ifqueue. The + * current traffic conditioner may drop a packet to make space on the + * queue. * * === ifq_start() * - * Once a packet has been successfully queued with ifq_enqueue() or - * ifq_enqueue_try(), the network card is notified with a call to - * if_start(). If an interface is marked with IFXF_MPSAFE in its - * if_xflags field, if_start() calls ifq_start() to dispatch the - * interfaces start routine. Calls to ifq_start() run in the ifqueue - * serialisation context, guaranteeing that only one instance of - * ifp->if_start() will be running in the system at any point in time. - * + * Once a packet has been successfully queued with ifq_enqueue(), + * the network card is notified with a call to if_start(). If an + * interface is marked with IFXF_MPSAFE in its if_xflags field, + * if_start() calls ifq_start() to dispatch the interfaces start + * routine. Calls to ifq_start() run in the ifqueue serialisation + * context, guaranteeing that only one instance of ifp->if_start() + * will be running in the system at any point in time. * * == Traffic conditioners API * @@ -324,7 +320,7 @@ struct ifqueue { struct ifq_ops { unsigned int (*ifqop_idx)(unsigned int, const struct mbuf *); - int (*ifqop_enq)(struct ifqueue *, struct mbuf *); + struct mbuf *(*ifqop_enq)(struct ifqueue *, struct mbuf *); struct mbuf *(*ifqop_deq_begin)(struct ifqueue *, void **); void (*ifqop_deq_commit)(struct ifqueue *, struct mbuf *, void *); @@ -341,7 +337,6 @@ struct ifq_ops { void ifq_init(struct ifqueue *, struct ifnet *, unsigned int); void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *); void ifq_destroy(struct ifqueue *); -int ifq_enqueue_try(struct ifqueue *, struct mbuf *); int ifq_enqueue(struct ifqueue *, struct mbuf *); struct mbuf *ifq_deq_begin(struct ifqueue *); void ifq_deq_commit(struct ifqueue *, struct mbuf *); Index: sys/mplockdebug.h =================================================================== RCS file: sys/mplockdebug.h diff -N sys/mplockdebug.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/mplockdebug.h 6 Mar 2017 00:44:02 -0000 @@ -0,0 +1,105 @@ +/* $OpenBSD: mplock.h,v 1.9 2007/11/26 17:15:29 art Exp $ */ + +/* + * Copyright (c) 2004 Niklas Hallqvist. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_MPLOCK_DEBUG_H_ +#define _SYS_MPLOCK_DEBUG_H_ + +#ifdef MP_LOCKDEBUG + +#include /* for CACHELINESIZE */ + +#define LOCK_STACK 7 + +/* this gets stuffed in the low bits of the stack values */ +#define LOCK_TYPE_MASK 1UL +#define LOCK_TYPE_MUTEX 0UL +#define LOCK_TYPE_MPLOCK 1UL + +struct lock_stack { + vaddr_t ls_entries[LOCK_STACK]; + unsigned int ls_index; +} __aligned(CACHELINESIZE); + +static inline void +lock_enter(void *lock, unsigned long type) +{ + extern struct lock_stack _lock_stacks[]; + struct lock_stack *ls; + unsigned int index; + + ls = &_lock_stacks[cpu_number()]; + index = ls->ls_index++; + KASSERTMSG(index < LOCK_STACK, "too many locks"); + ls->ls_entries[index] = ((vaddr_t)lock | type); +} + +static inline void +lock_swap(void) +{ + extern struct lock_stack _lock_stacks[]; + struct lock_stack *ls; + unsigned int index; + vaddr_t *entries; + vaddr_t tmp; + + ls = &_lock_stacks[cpu_number()]; + index = ls->ls_index; + KASSERTMSG(index >= 2, "not enough locks to swap"); + index -= 2; + entries = &ls->ls_entries[index]; + + tmp = entries[0]; + entries[0] = entries[1]; + entries[1] = tmp; +} + +static inline void +lock_leave(void *lock, unsigned long type) +{ + extern struct lock_stack _lock_stacks[]; + struct lock_stack *ls; + unsigned int index; + + ls = &_lock_stacks[cpu_number()]; + index = ls->ls_index - 1; + if (ls->ls_entries[index] != ((vaddr_t)lock | type)) + panic("lock %p released out of order", lock); + ls->ls_index = index; +} + +void lock_check(struct cpu_info *); +void lock_idle(void); + +#else /* MP_LOCKDEBUG */ + +#define lock_enter(_lock, _type) /* nothing */ +#define lock_leave(_lock, _type) /* nothing */ +#define lock_swap() /* nothing */ +#define lock_idle() /* nothing */ + +#endif /* MP_LOCKDEBUG */ + +#endif /* _SYS_MPLOCKDEBUG_H_ */ Index: sys/refcnt.h =================================================================== RCS file: /cvs/src/sys/sys/refcnt.h,v retrieving revision 1.4 diff -u -p -r1.4 refcnt.h --- sys/refcnt.h 7 Jun 2016 07:53:33 -0000 1.4 +++ sys/refcnt.h 6 Mar 2017 00:44:02 -0000 @@ -30,7 +30,7 @@ struct refcnt { void refcnt_init(struct refcnt *); void refcnt_take(struct refcnt *); int refcnt_rele(struct refcnt *); -void refcnt_rele_wake(struct refcnt *); +int refcnt_rele_wake(struct refcnt *); void refcnt_finalize(struct refcnt *, const char *); #endif /* _KERNEL */ Index: sys/systm.h =================================================================== RCS file: /cvs/src/sys/sys/systm.h,v retrieving revision 1.124 diff -u -p -r1.124 systm.h --- sys/systm.h 14 Feb 2017 09:46:21 -0000 1.124 +++ sys/systm.h 6 Mar 2017 00:44:02 -0000 @@ -356,6 +356,10 @@ void user_config(void); #endif #if defined(MULTIPROCESSOR) +struct cpu_info; +struct task; +void cpu_xcall(struct cpu_info *, struct task *); + void _kernel_lock_init(void); void _kernel_lock(void); void _kernel_unlock(void); Index: sys/task.h =================================================================== RCS file: /cvs/src/sys/sys/task.h,v retrieving revision 1.11 diff -u -p -r1.11 task.h --- sys/task.h 7 Jun 2016 07:53:33 -0000 1.11 +++ sys/task.h 6 Mar 2017 00:44:02 -0000 @@ -20,26 +20,47 @@ #define _SYS_TASK_H_ #include +#include struct taskq; struct task { - TAILQ_ENTRY(task) t_entry; + union { + TAILQ_ENTRY(task) _t_list; + HEAP_ENTRY(task) _t_heap; + } _t_entry; +#define t_entry _t_entry._t_list + void (*t_func)(void *); void *t_arg; + unsigned int t_flags; + int t_deadline; }; TAILQ_HEAD(task_list, task); +HEAP_HEAD(task_heap); + +#define TASK_ONQUEUE 2 /* task is on the todo queue */ +#define TASK_INITIALIZED 4 /* task is initialized */ #define TASKQ_MPSAFE (1 << 0) #define TASKQ_CANTSLEEP (1 << 1) -#define TASK_INITIALIZER(_f, _a) {{ NULL, NULL }, (_f), (_a), 0 } +#define task_pending(_t) ((_t)->t_flags & TASK_ONQUEUE) +#define task_initialized(_t) ((_t)->t_flags & TASK_INITIALIZED) #ifdef _KERNEL +HEAP_PROTOTYPE(task_heap, task); + extern struct taskq *const systq; extern struct taskq *const systqmp; + +#define TASK_INITIALIZER(_f, _a) { \ + .t_func = (_f), \ + .t_arg = (_a), \ + .t_flags = TASK_INITIALIZED, \ +} struct taskq *taskq_create(const char *, unsigned int, int, unsigned int); void taskq_destroy(struct taskq *); Index: sys/tree.h =================================================================== RCS file: /cvs/src/sys/sys/tree.h,v retrieving revision 1.25 diff -u -p -r1.25 tree.h --- sys/tree.h 26 Sep 2016 08:08:51 -0000 1.25 +++ sys/tree.h 6 Mar 2017 00:44:02 -0000 @@ -984,4 +984,116 @@ RBT_GENERATE_INTERNAL(_name, _type, _fie #endif /* _KERNEL */ +struct _heap_type { + int (*t_compare)(const void *, const void *); + unsigned int t_offset; /* offset of heap_entry in type */ +}; + +struct _heap_entry { + struct _heap_entry *he_left; + struct _heap_entry *he_child; + struct _heap_entry *he_nextsibling; +}; +#define HEAP_ENTRY(_entry) struct _heap_entry + +struct _heap { + struct _heap_entry *h_root; +}; + +#define HEAP_HEAD(_name) \ +struct _name { \ + struct _heap heap; \ +} + +#ifdef _KERNEL + +static inline void +_heap_init(struct _heap *h) +{ + h->h_root = NULL; +} + +static inline int +_heap_empty(struct _heap *h) +{ + return (h->h_root == NULL); +} + +void _heap_insert(const struct _heap_type *, struct _heap *, void *); +void _heap_remove(const struct _heap_type *, struct _heap *, void *); +void *_heap_first(const struct _heap_type *, struct _heap *); +void *_heap_extract(const struct _heap_type *, struct _heap *); +void *_heap_cextract(const struct _heap_type *, struct _heap *, + const void *); + +#define HEAP_INITIALIZER(_head) { { NULL } } + +#define HEAP_PROTOTYPE(_name, _type) \ +extern const struct _heap_type *const _name##_HEAP_TYPE; \ + \ +static inline void \ +_name##_HEAP_INIT(struct _name *head) \ +{ \ + _heap_init(&head->heap); \ +} \ + \ +static inline void \ +_name##_HEAP_INSERT(struct _name *head, struct _type *elm) \ +{ \ + _heap_insert(_name##_HEAP_TYPE, &head->heap, elm); \ +} \ + \ +static inline void \ +_name##_HEAP_REMOVE(struct _name *head, struct _type *elm) \ +{ \ + _heap_remove(_name##_HEAP_TYPE, &head->heap, elm); \ +} \ + \ +static inline struct _type * \ +_name##_HEAP_FIRST(struct _name *head) \ +{ \ + return _heap_first(_name##_HEAP_TYPE, &head->heap); \ +} \ + \ +static inline struct _type * \ +_name##_HEAP_EXTRACT(struct _name *head) \ +{ \ + return _heap_extract(_name##_HEAP_TYPE, &head->heap); \ +} \ + \ +static inline struct _type * \ +_name##_HEAP_CEXTRACT(struct _name *head, const struct _type *key) \ +{ \ + return _heap_cextract(_name##_HEAP_TYPE, &head->heap, key); \ +} \ + \ +static inline int \ +_name##_HEAP_EMPTY(struct _name *head) \ +{ \ + return _heap_empty(&head->heap); \ +} + +#define HEAP_GENERATE(_name, _type, _field, _cmp) \ +static int \ +_name##_HEAP_COMPARE(const void *lptr, const void *rptr) \ +{ \ + const struct _type *l = lptr, *r = rptr; \ + return _cmp(l, r); \ +} \ +static const struct _heap_type _name##_HEAP_INFO = { \ + _name##_HEAP_COMPARE, \ + offsetof(struct _type, _field), \ +}; \ +const struct _heap_type *const _name##_HEAP_TYPE = &_name##_HEAP_INFO + +#define HEAP_INIT(_name, _h) _name##_HEAP_INIT((_h)) +#define HEAP_INSERT(_name, _h, _e) _name##_HEAP_INSERT((_h), (_e)) +#define HEAP_REMOVE(_name, _h, _e) _name##_HEAP_REMOVE((_h), (_e)) +#define HEAP_FIRST(_name, _h) _name##_HEAP_FIRST((_h)) +#define HEAP_EXTRACT(_name, _h) _name##_HEAP_EXTRACT((_h)) +#define HEAP_CEXTRACT(_name, _h, _k) _name##_HEAP_CEXTRACT((_h), (_k)) +#define HEAP_EMPTY(_name, _h) _name##_HEAP_EMPTY((_h)) + +#endif /* _KERNEL */ + #endif /* _SYS_TREE_H_ */ Index: ufs/ufs/ufs_dirhash.c =================================================================== RCS file: /cvs/src/sys/ufs/ufs/ufs_dirhash.c,v retrieving revision 1.38 diff -u -p -r1.38 ufs_dirhash.c --- ufs/ufs/ufs_dirhash.c 15 Sep 2016 02:00:18 -0000 1.38 +++ ufs/ufs/ufs_dirhash.c 6 Mar 2017 00:44:02 -0000 @@ -345,11 +345,11 @@ ufsdirhash_lookup(struct inode *ip, char TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh, dh_list); } + DIRHASH_UNLOCK(dh); DIRHASHLIST_UNLOCK(); - } else { - /* Already the last, though that could change as we wait. */ - DIRHASH_LOCK(dh); } + DIRHASH_LOCK(dh); + if (dh->dh_hash == NULL) { DIRHASH_UNLOCK(dh); ufsdirhash_free(ip);