? arch/amd64/conf/DEBUG.MP ? arch/amd64/conf/PROFILED Index: arch/amd64/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/arch/amd64/conf/GENERIC,v retrieving revision 1.401 diff -u -p -r1.401 GENERIC --- arch/amd64/conf/GENERIC 23 Nov 2015 22:57:12 -0000 1.401 +++ arch/amd64/conf/GENERIC 27 Nov 2015 03:55:51 -0000 @@ -9,7 +9,9 @@ # of each device driver in this file see the section 4 man page for the # device. + machine amd64 +makeoptions DEBUG="-g" include "../../../conf/GENERIC" maxusers 80 # estimated number of users @@ -108,7 +110,7 @@ uguru0 at isa? disable port 0xe0 # ABIT aps0 at isa? port 0x1600 # ThinkPad Active Protection System asmc0 at isa? port 0x300 # Apple SMC -piixpm* at pci? # Intel PIIX PM +piixpm* at pci? disable # Intel PIIX PM iic* at piixpm? ichiic* at pci? # Intel ICH SMBus controller iic* at ichiic? Index: conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v retrieving revision 1.222 diff -u -p -r1.222 GENERIC --- conf/GENERIC 24 Oct 2015 10:52:05 -0000 1.222 +++ conf/GENERIC 27 Nov 2015 03:55:53 -0000 @@ -3,6 +3,8 @@ # Machine-independent option; used by all architectures for their # GENERIC kernel +option MSGBUFSIZE="64*NBPG" + #option INSECURE # default to secure option DDB # in-kernel debugger Index: dev/pci/if_myx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_myx.c,v retrieving revision 1.88 diff -u -p -r1.88 if_myx.c --- dev/pci/if_myx.c 25 Nov 2015 03:09:59 -0000 1.88 +++ dev/pci/if_myx.c 27 Nov 2015 03:55:54 -0000 @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include @@ -512,6 +512,7 @@ myx_attachhook(void *arg) ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_xflags = IFXF_MPSAFE; ifp->if_ioctl = myx_ioctl; ifp->if_start = myx_start; ifp->if_watchdog = myx_watchdog; @@ -1437,11 +1438,6 @@ myx_start(struct ifnet *ifp) u_int free, used; u_int8_t flags; - if (!ISSET(ifp->if_flags, IFF_RUNNING) || - ifq_is_oactive(&ifp->if_snd) || - IFQ_IS_EMPTY(&ifp->if_snd)) - return; - idx = sc->sc_tx_ring_prod; /* figure out space */ @@ -1589,11 +1585,10 @@ int myx_intr(void *arg) { struct myx_softc *sc = (struct myx_softc *)arg; - struct ifnet *ifp = &sc->sc_ac.ac_if; volatile struct myx_status *sts = sc->sc_sts; enum myx_state state; bus_dmamap_t map = sc->sc_sts_dma.mxm_map; - u_int32_t data, start; + u_int32_t data; u_int8_t valid = 0; state = sc->sc_state; @@ -1639,15 +1634,12 @@ myx_intr(void *arg) bus_space_write_raw_region_4(sc->sc_memt, sc->sc_memh, sc->sc_irqclaimoff + sizeof(data), &data, sizeof(data)); - start = ifq_is_oactive(&ifp->if_snd); - if (sts->ms_statusupdated) { if (state == MYX_S_DOWN && sc->sc_linkdown != sts->ms_linkdown) { sc->sc_state = MYX_S_OFF; membar_producer(); wakeup(sts); - start = 0; } else { data = sts->ms_linkstate; if (data != 0xffffffff) { @@ -1661,13 +1653,6 @@ myx_intr(void *arg) bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); - if (start) { - KERNEL_LOCK(); - ifq_clr_oactive(&ifp->if_snd); - myx_start(ifp); - KERNEL_UNLOCK(); - } - return (1); } @@ -1716,6 +1701,10 @@ myx_txeof(struct myx_softc *sc, u_int32_ sc->sc_tx_ring_cons = idx; sc->sc_tx_cons = cons; + + ifq_clr_oactive(&ifp->if_snd); + if (!ifq_empty(&ifp->if_snd)) + if_start(ifp); } void @@ -1865,23 +1854,6 @@ destroy: } free(mrr->mrr_slots, M_DEVBUF, sizeof(*ms) * sc->sc_rx_ring_count); return (rv); -} - -static inline int -myx_rx_ring_enter(struct myx_rx_ring *mrr) -{ - return (atomic_inc_int_nv(&mrr->mrr_running) == 1); -} - -static inline int -myx_rx_ring_leave(struct myx_rx_ring *mrr) -{ - if (atomic_cas_uint(&mrr->mrr_running, 1, 0) == 1) - return (1); - - mrr->mrr_running = 1; - - return (0); } int Index: kern/kern_exit.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exit.c,v retrieving revision 1.154 diff -u -p -r1.154 kern_exit.c --- kern/kern_exit.c 9 Oct 2015 01:10:27 -0000 1.154 +++ kern/kern_exit.c 27 Nov 2015 03:55:55 -0000 @@ -426,6 +426,8 @@ reaper(void) { struct proc *p; + sched_peg_curproc(&cpu_info_primary); + KERNEL_UNLOCK(); SCHED_ASSERT_UNLOCKED(); Index: kern/kern_malloc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_malloc.c,v retrieving revision 1.128 diff -u -p -r1.128 kern_malloc.c --- kern/kern_malloc.c 14 Mar 2015 03:38:50 -0000 1.128 +++ kern/kern_malloc.c 27 Nov 2015 03:55:55 -0000 @@ -166,19 +166,6 @@ malloc(size_t size, int type, int flags) KASSERT(flags & (M_WAITOK | M_NOWAIT)); - if ((flags & M_NOWAIT) == 0) { - extern int pool_debug; -#ifdef DIAGNOSTIC - assertwaitok(); - if (pool_debug == 2) - yield(); -#endif - if (!cold && pool_debug) { - KERNEL_UNLOCK(); - KERNEL_LOCK(); - } - } - #ifdef MALLOC_DEBUG if (debug_malloc(size, type, flags, (void **)&va)) { if ((flags & M_ZERO) && va != NULL) Index: kern/kern_task.c =================================================================== RCS file: /cvs/src/sys/kern/kern_task.c,v retrieving revision 1.15 diff -u -p -r1.15 kern_task.c --- kern/kern_task.c 19 Nov 2015 13:19:24 -0000 1.15 +++ kern/kern_task.c 27 Nov 2015 03:55:55 -0000 @@ -22,6 +22,7 @@ #include #include #include +#include #define TASK_ONQUEUE 1 @@ -68,6 +69,7 @@ struct taskq *const systqmp = &taskq_sys void taskq_init(void); /* called in init_main.c */ void taskq_create_thread(void *); +void taskq_barrier_task(void *); int taskq_sleep(const volatile void *, struct mutex *, int, const char *, int); int taskq_next_work(struct taskq *, struct task *, sleepfn); @@ -176,6 +178,30 @@ taskq_create_thread(void *arg) } while (tq->tq_running < tq->tq_nthreads); mtx_leave(&tq->tq_mtx); +} + +void +taskq_barrier(struct taskq *tq) +{ + struct sleep_state sls; + unsigned int notdone = 1; + struct task t = TASK_INITIALIZER(taskq_barrier_task, ¬done); + + task_add(tq, &t); + + while (notdone) { + sleep_setup(&sls, ¬done, PWAIT, "tqbar"); + sleep_finish(&sls, notdone); + } +} + +void +taskq_barrier_task(void *p) +{ + unsigned int *notdone = p; + + *notdone = 0; + wakeup_one(notdone); } void Index: kern/uipc_mbuf.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v retrieving revision 1.214 diff -u -p -r1.214 uipc_mbuf.c --- kern/uipc_mbuf.c 21 Nov 2015 11:46:24 -0000 1.214 +++ kern/uipc_mbuf.c 27 Nov 2015 03:55:55 -0000 @@ -125,8 +125,8 @@ void nmbclust_update(void); void m_zero(struct mbuf *); -const char *mclpool_warnmsg = - "WARNING: mclpools limit reached; increase kern.maxclusters"; +const char *mbufpl_warnmsg = + "WARNING: mbuf limit reached; increase kern.maxclusters"; /* * Initialize the mbuf allocator. @@ -159,7 +159,6 @@ mbinit(void) mclnames[i], NULL); pool_setipl(&mclpools[i], IPL_NET); pool_set_constraints(&mclpools[i], &kp_dma_contig); - pool_setlowat(&mclpools[i], mcllowat); } nmbclust_update(); @@ -168,25 +167,7 @@ mbinit(void) void nmbclust_update(void) { - int i; - /* - * Set the hard limit on the mclpools to the number of - * mbuf clusters the kernel is to support. Log the limit - * reached message max once a minute. - */ - for (i = 0; i < nitems(mclsizes); i++) { - (void)pool_sethardlimit(&mclpools[i], nmbclust, - mclpool_warnmsg, 60); - /* - * XXX this needs to be reconsidered. - * Setting the high water mark to nmbclust is too high - * but we need to have enough spare buffers around so that - * allocations in interrupt context don't fail or mclgeti() - * drivers may end up with empty rings. - */ - pool_sethiwat(&mclpools[i], nmbclust); - } - pool_sethiwat(&mbpool, nmbclust); + (void)pool_sethardlimit(&mbpool, nmbclust, mbufpl_warnmsg, 60); } /* Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.413 diff -u -p -r1.413 if.c --- net/if.c 25 Nov 2015 03:10:00 -0000 1.413 +++ net/if.c 27 Nov 2015 03:55:55 -0000 @@ -81,6 +81,7 @@ #include #include #include +#include #include @@ -152,6 +153,9 @@ void if_input_process(void *); void ifa_print_all(void); #endif +void if_start_mpsafe(struct ifnet *ifp); +void if_start_locked(struct ifnet *ifp); + /* * interface index map * @@ -535,30 +539,90 @@ if_attach_common(struct ifnet *ifp) void if_start(struct ifnet *ifp) { + if (ISSET(ifp->if_xflags, IFXF_MPSAFE)) + if_start_mpsafe(ifp); + else + if_start_locked(ifp); +} + +void +if_start_locked(struct ifnet *ifp) +{ + int s; + + KERNEL_LOCK(); + s = splnet(); + ifp->if_start(ifp); + splx(s); + KERNEL_UNLOCK(); +} + +static inline unsigned int +ifq_enter(struct ifqueue *ifq) +{ + return (atomic_inc_int_nv(&ifq->ifq_serializer) == 1); +} + +static inline unsigned int +ifq_leave(struct ifqueue *ifq) +{ + if (atomic_cas_uint(&ifq->ifq_serializer, 1, 0) == 1) + return (1); - splassert(IPL_NET); + ifq->ifq_serializer = 1; - if (ifq_len(&ifp->if_snd) >= min(8, ifp->if_snd.ifq_maxlen) && - !ifq_is_oactive(&ifp->if_snd)) { - if (ISSET(ifp->if_xflags, IFXF_TXREADY)) { - TAILQ_REMOVE(&iftxlist, ifp, if_txlist); - CLR(ifp->if_xflags, IFXF_TXREADY); + return (0); +} + +void +if_start_mpsafe(struct ifnet *ifp) +{ + struct ifqueue *ifq = &ifp->if_snd; + + if (!ifq_enter(ifq)) + return; + + do { + if (__predict_false(!ISSET(ifp->if_flags, IFF_RUNNING))) { + ifq->ifq_serializer = 0; + wakeup_one(&ifq->ifq_serializer); + return; } + + if (ifq_empty(ifq) || ifq_is_oactive(ifq)) + continue; + ifp->if_start(ifp); + + } while (!ifq_leave(ifq)); +} + +void +if_start_barrier(struct ifnet *ifp) +{ + struct sleep_state sls; + struct ifqueue *ifq = &ifp->if_snd; + + /* this should only be called from converted drivers */ + KASSERT(ISSET(ifp->if_xflags, IFXF_MPSAFE)); + + /* drivers should only call this on the way down */ + KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING)); + + if (ifq->ifq_serializer == 0) return; - } - if (!ISSET(ifp->if_xflags, IFXF_TXREADY)) { - SET(ifp->if_xflags, IFXF_TXREADY); - TAILQ_INSERT_TAIL(&iftxlist, ifp, if_txlist); - schednetisr(NETISR_TX); - } + if_start_mpsafe(ifp); /* spin the wheel to guarantee a wakeup */ + do { + sleep_setup(&sls, &ifq->ifq_serializer, PWAIT, "ifbar"); + sleep_finish(&sls, ifq->ifq_serializer != 0); + } while (ifq->ifq_serializer != 0); } int if_enqueue(struct ifnet *ifp, struct mbuf *m) { - int s, length, error = 0; + int length, error = 0; unsigned short mflags; #if NBRIDGE > 0 @@ -569,17 +633,13 @@ if_enqueue(struct ifnet *ifp, struct mbu length = m->m_pkthdr.len; mflags = m->m_flags; - s = splnet(); - /* * Queue message on interface, and start output if interface * not yet active. */ IFQ_ENQUEUE(&ifp->if_snd, m, error); - if (error) { - splx(s); + if (error) return (error); - } ifp->if_obytes += length; if (mflags & M_MCAST) @@ -587,8 +647,6 @@ if_enqueue(struct ifnet *ifp, struct mbu if_start(ifp); - splx(s); - return (0); } @@ -808,21 +866,6 @@ if_input_process(void *xmq) } void -nettxintr(void) -{ - struct ifnet *ifp; - int s; - - s = splnet(); - while ((ifp = TAILQ_FIRST(&iftxlist)) != NULL) { - TAILQ_REMOVE(&iftxlist, ifp, if_txlist); - CLR(ifp->if_xflags, IFXF_TXREADY); - ifp->if_start(ifp); - } - splx(s); -} - -void if_deactivate(struct ifnet *ifp) { int s; @@ -902,8 +945,6 @@ if_detach(struct ifnet *ifp) /* Remove the interface from the list of all interfaces. */ TAILQ_REMOVE(&ifnet, ifp, if_list); - if (ISSET(ifp->if_xflags, IFXF_TXREADY)) - TAILQ_REMOVE(&iftxlist, ifp, if_txlist); while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL) if_delgroup(ifp, ifg->ifgl_group->ifg_group); Index: net/if.h =================================================================== RCS file: /cvs/src/sys/net/if.h,v retrieving revision 1.173 diff -u -p -r1.173 if.h --- net/if.h 20 Nov 2015 12:27:42 -0000 1.173 +++ net/if.h 27 Nov 2015 03:55:55 -0000 @@ -206,14 +206,14 @@ struct if_status_description { (IFF_BROADCAST|IFF_POINTOPOINT|IFF_RUNNING|IFF_OACTIVE|\ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI) -#define IFXF_TXREADY 0x1 /* interface is ready to tx */ +#define IFXF_MPSAFE 0x1 /* if_start is mpsafe */ #define IFXF_INET6_NOPRIVACY 0x4 /* don't autoconf privacy */ #define IFXF_MPLS 0x8 /* supports MPLS */ #define IFXF_WOL 0x10 /* wake on lan enabled */ #define IFXF_AUTOCONF6 0x20 /* v6 autoconf enabled */ #define IFXF_CANTCHANGE \ - (IFXF_TXREADY) + (IFXF_MPSAFE) /* * Some convenience macros used for setting ifi_baudrate. Index: net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.58 diff -u -p -r1.58 if_var.h --- net/if_var.h 25 Nov 2015 03:10:00 -0000 1.58 +++ net/if_var.h 27 Nov 2015 03:55:55 -0000 @@ -134,7 +134,6 @@ struct ifnet { /* and the entries */ void *if_softc; /* lower-level data for this if */ struct refcnt if_refcnt; TAILQ_ENTRY(ifnet) if_list; /* all struct ifnets are chained */ - TAILQ_ENTRY(ifnet) if_txlist; /* list of ifnets ready to tx */ TAILQ_HEAD(, ifaddr) if_addrlist; /* linked list of addresses per if */ TAILQ_HEAD(, ifmaddr) if_maddrlist; /* list of multicast records */ TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */ @@ -369,6 +368,7 @@ extern struct ifnet_head ifnet; extern unsigned int lo0ifidx; void if_start(struct ifnet *); +void if_start_barrier(struct ifnet *); int if_enqueue_try(struct ifnet *, struct mbuf *); int if_enqueue(struct ifnet *, struct mbuf *); void if_input(struct ifnet *, struct mbuf_list *); Index: net/netisr.c =================================================================== RCS file: /cvs/src/sys/net/netisr.c,v retrieving revision 1.7 diff -u -p -r1.7 netisr.c --- net/netisr.c 20 Jul 2015 21:16:39 -0000 1.7 +++ net/netisr.c 27 Nov 2015 03:55:55 -0000 @@ -68,8 +68,6 @@ netintr(void *unused) /* ARGSUSED */ if (t & (1 << NETISR_PFSYNC)) pfsyncintr(); #endif - if (t & (1 << NETISR_TX)) - nettxintr(); } void Index: netinet/ip_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.262 diff -u -p -r1.262 ip_input.c --- netinet/ip_input.c 23 Nov 2015 15:54:45 -0000 1.262 +++ netinet/ip_input.c 27 Nov 2015 03:55:55 -0000 @@ -191,13 +191,11 @@ struct route ipforward_rt; void ipintr(void) { + struct mbuf_list ml; struct mbuf *m; - /* - * Get next datagram off input queue and get IP header - * in first mbuf. - */ - while ((m = niq_dequeue(&ipintrq)) != NULL) { + niq_delist(&ipintrq, &ml); + while ((m = ml_dequeue(&ml)) != NULL) { #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) panic("ipintr no HDR"); Index: sys/task.h =================================================================== RCS file: /cvs/src/sys/sys/task.h,v retrieving revision 1.8 diff -u -p -r1.8 task.h --- sys/task.h 9 Feb 2015 03:15:41 -0000 1.8 +++ sys/task.h 27 Nov 2015 03:55:55 -0000 @@ -39,6 +39,7 @@ extern struct taskq *const systqmp; struct taskq *taskq_create(const char *, unsigned int, int, unsigned int); void taskq_destroy(struct taskq *); +void taskq_barrier(struct taskq *); void task_set(struct task *, void (*)(void *), void *); int task_add(struct taskq *, struct task *);