Index: crypto/crypto.c
===================================================================
RCS file: /cvs/src/sys/crypto/crypto.c,v
retrieving revision 1.72
diff -u -p -r1.72 crypto.c
--- crypto/crypto.c	23 Oct 2014 00:15:09 -0000	1.72
+++ crypto/crypto.c	7 Jan 2015 03:37:52 -0000
@@ -1,4 +1,4 @@
-/*	$OpenBSD: crypto.c,v 1.72 2014/10/23 00:15:09 dlg Exp $	*/
+/*	$OpenBSD: crypto.c,v 1.71 2014/10/23 00:11:48 dlg Exp $	*/
 /*
  * The author of this code is Angelos D. Keromytis (angelos@cis.upenn.edu)
  *
@@ -453,16 +453,20 @@ void
 crypto_freereq(struct cryptop *crp)
 {
 	struct cryptodesc *crd;
+	int s;
 
 	if (crp == NULL)
 		return;
 
+	s = splvm();
+
 	while ((crd = crp->crp_desc) != NULL) {
 		crp->crp_desc = crd->crd_next;
 		pool_put(&cryptodesc_pool, crd);
 	}
 
 	pool_put(&cryptop_pool, crp);
+	splx(s);
 }
 
 /*
@@ -473,14 +477,20 @@ crypto_getreq(int num)
 {
 	struct cryptodesc *crd;
 	struct cryptop *crp;
+	int s;
 	
+	s = splvm();
+
 	crp = pool_get(&cryptop_pool, PR_NOWAIT | PR_ZERO);
-	if (crp == NULL)
+	if (crp == NULL) {
+		splx(s);
 		return NULL;
+	}
 
 	while (num--) {
 		crd = pool_get(&cryptodesc_pool, PR_NOWAIT | PR_ZERO);
 		if (crd == NULL) {
+			splx(s);
 			crypto_freereq(crp);
 			return NULL;
 		}
@@ -489,6 +499,7 @@ crypto_getreq(int num)
 		crp->crp_desc = crd;
 	}
 
+	splx(s);
 	return crp;
 }
 
@@ -499,10 +510,8 @@ crypto_init(void)
 
 	pool_init(&cryptop_pool, sizeof(struct cryptop), 0, 0,
 	    0, "cryptop", NULL);
-	pool_setipl(&cryptop_pool, IPL_VM);
 	pool_init(&cryptodesc_pool, sizeof(struct cryptodesc), 0, 0,
 	    0, "cryptodesc", NULL);
-	pool_setipl(&cryptodesc_pool, IPL_VM);
 }
 
 /*
Index: dev/pci/if_myx.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
retrieving revision 1.72
diff -u -p -r1.72 if_myx.c
--- dev/pci/if_myx.c	22 Dec 2014 02:28:52 -0000	1.72
+++ dev/pci/if_myx.c	7 Jan 2015 03:37:52 -0000
@@ -1,4 +1,4 @@
-/*	$OpenBSD: if_myx.c,v 1.72 2014/12/22 02:28:52 tedu Exp $	*/
+/*	$OpenBSD: if_myx.c,v 1.61 2014/07/12 18:48:51 tedu Exp $	*/
 
 /*
  * Copyright (c) 2007 Reyk Floeter <reyk@openbsd.org>
@@ -29,11 +29,10 @@
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/malloc.h>
-#include <sys/pool.h>
 #include <sys/timeout.h>
+#include <sys/proc.h>
 #include <sys/device.h>
 #include <sys/queue.h>
-#include <sys/atomic.h>
 
 #include <machine/bus.h>
 #include <machine/intr.h>
@@ -47,8 +46,10 @@
 #include <net/bpf.h>
 #endif
 
+#ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
+#endif
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
@@ -206,7 +207,7 @@ void	 myx_write_txd_tail(struct myx_soft
 int	 myx_load_buf(struct myx_softc *, struct myx_buf *, struct mbuf *);
 int	 myx_setlladdr(struct myx_softc *, u_int32_t, u_int8_t *);
 int	 myx_intr(void *);
-void	 myx_rxeof(struct myx_softc *);
+int	 myx_rxeof(struct myx_softc *);
 void	 myx_txeof(struct myx_softc *, u_int32_t);
 
 struct myx_buf *	myx_buf_alloc(struct myx_softc *, bus_size_t, int,
@@ -922,8 +923,10 @@ myx_ioctl(struct ifnet *ifp, u_long cmd,
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
+#ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET)
 			arp_ifinit(&sc->sc_ac, ifa);
+#endif
 		/* FALLTHROUGH */
 
 	case SIOCSIFFLAGS:
@@ -1507,23 +1510,27 @@ myx_start(struct ifnet *ifp)
 		return;
 
 	for (;;) {
-		if (sc->sc_tx_free <= sc->sc_tx_nsegs ||
-		    (mb = myx_buf_get(&sc->sc_tx_buf_free)) == NULL) {
+		if (sc->sc_tx_free <= sc->sc_tx_nsegs) {
 			SET(ifp->if_flags, IFF_OACTIVE);
 			break;
 		}
 
-		IFQ_DEQUEUE(&ifp->if_snd, m);
-		if (m == NULL) {
-			myx_buf_put(&sc->sc_tx_buf_free, mb);
+		IFQ_POLL(&ifp->if_snd, m);
+		if (m == NULL)
+			break;
+
+		mb = myx_buf_get(&sc->sc_tx_buf_free);
+		if (mb == NULL) {
+			SET(ifp->if_flags, IFF_OACTIVE);
 			break;
 		}
 
+		IFQ_DEQUEUE(&ifp->if_snd, m);
 		if (myx_load_buf(sc, mb, m) != 0) {
 			m_freem(m);
 			myx_buf_put(&sc->sc_tx_buf_free, mb);
 			ifp->if_oerrors++;
-			continue;
+			break;
 		}
 
 #if NBPFILTER > 0
@@ -1646,7 +1653,9 @@ myx_intr(void *arg)
 	enum myx_state		 state = MYX_S_RUNNING;
 	bus_dmamap_t		 map = sc->sc_sts_dma.mxm_map;
 	u_int32_t		 data, link = 0xffffffff;
+	int			 refill = 0;
 	u_int8_t		 valid = 0;
+	int			 i;
 
 	mtx_enter(&sc->sc_sts_mtx);
 	if (sc->sc_state == MYX_S_OFF) {
@@ -1693,7 +1702,7 @@ myx_intr(void *arg)
 
 	data = htobe32(3);
 	if (valid & 0x1) {
-		myx_rxeof(sc);
+		refill |= myx_rxeof(sc);
 
 		bus_space_write_raw_region_4(sc->sc_memt, sc->sc_memh,
 		    sc->sc_irqclaimoff, &data, sizeof(data));
@@ -1711,17 +1720,22 @@ myx_intr(void *arg)
 		return (1);
 	}
 
-	if (link != 0xffffffff) {
-		KERNEL_LOCK();
+	KERNEL_LOCK();
+	if (link != 0xffffffff)
 		myx_link_state(sc, link);
-		KERNEL_UNLOCK();
-	}
 
 	if (ISSET(ifp->if_flags, IFF_OACTIVE)) {
-		KERNEL_LOCK();
 		CLR(ifp->if_flags, IFF_OACTIVE);
 		myx_start(ifp);
-		KERNEL_UNLOCK();
+	}
+	KERNEL_UNLOCK();
+
+	for (i = 0; i < 2; i++) {
+		if (ISSET(refill, 1 << i)) {
+			if (myx_rx_fill(sc, i) >= 0 &&
+			    myx_bufs_empty(&sc->sc_rx_buf_list[i]))
+				timeout_add(&sc->sc_refill, 0);
+		}
 	}
 
 	return (1);
@@ -1764,10 +1778,12 @@ myx_txeof(struct myx_softc *sc, u_int32_
 		bus_dmamap_sync(sc->sc_dmat, map, 0,
 		    map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
 
+		KERNEL_LOCK();
 		bus_dmamap_unload(sc->sc_dmat, map);
+		m_freem(m);
 		ifp->if_opackets++;
+		KERNEL_UNLOCK();
 
-		m_freem(m);
 		myx_buf_put(&sc->sc_tx_buf_free, mb);
 	} while (++sc->sc_tx_count != done_count);
 
@@ -1778,15 +1794,15 @@ myx_txeof(struct myx_softc *sc, u_int32_
 	}
 }
 
-void
+int
 myx_rxeof(struct myx_softc *sc)
 {
 	static const struct myx_intrq_desc zerodesc = { 0, 0 };
 	struct ifnet *ifp = &sc->sc_ac.ac_if;
-	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
 	struct myx_buf *mb;
 	struct mbuf *m;
 	int ring;
+	int rings = 0;
 	u_int rxfree[2] = { 0 , 0 };
 	u_int len;
 
@@ -1810,14 +1826,22 @@ myx_rxeof(struct myx_softc *sc)
 
 		bus_dmamap_sync(sc->sc_dmat, mb->mb_map, 0,
 		    mb->mb_map->dm_mapsize, BUS_DMASYNC_POSTREAD);
-		bus_dmamap_unload(sc->sc_dmat, mb->mb_map);
 
 		m = mb->mb_m;
 		m->m_data += ETHER_ALIGN;
 		m->m_pkthdr.rcvif = ifp;
 		m->m_pkthdr.len = m->m_len = len;
 
-		ml_enqueue(&ml, m);
+		KERNEL_LOCK();
+		bus_dmamap_unload(sc->sc_dmat, mb->mb_map);
+#if NBPFILTER > 0
+		if (ifp->if_bpf)
+			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_IN);
+#endif
+
+		ether_input_mbuf(ifp, m);
+		ifp->if_ipackets++;
+		KERNEL_UNLOCK();
 
 		myx_buf_put(&sc->sc_rx_buf_free[ring], mb);
 
@@ -1835,24 +1859,10 @@ myx_rxeof(struct myx_softc *sc)
 		if_rxr_put(&sc->sc_rx_ring[ring], rxfree[ring]);
 		mtx_leave(&sc->sc_rx_ring_lock[ring].mrl_mtx);
 
-		if (myx_rx_fill(sc, ring) >= 0 &&
-		    myx_bufs_empty(&sc->sc_rx_buf_list[ring]))
-			timeout_add(&sc->sc_refill, 0);
-	}
-
-	ifp->if_ipackets += ml_len(&ml);
-
-	KERNEL_LOCK();
-#if NBPFILTER > 0
-	if (ifp->if_bpf) {
-		MBUF_LIST_FOREACH(&ml, m)
-			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_IN);
+		SET(rings, 1 << ring);
 	}
-#endif
 
-	while ((m = ml_dequeue(&ml)) != NULL)
-		ether_input_mbuf(ifp, m);
-	KERNEL_UNLOCK();
+	return (rings);
 }
 
 void
@@ -1871,57 +1881,13 @@ myx_rx_zero(struct myx_softc *sc, int ri
 	}
 }
 
-static inline int
-myx_rx_fill_slots(struct myx_softc *sc, int ring, u_int slots)
+int
+myx_rx_fill(struct myx_softc *sc, int ring)
 {
 	struct myx_rx_desc rxd;
 	struct myx_buf *mb, *firstmb;
 	u_int32_t offset = sc->sc_rx_ring_offset[ring];
-	u_int idx, firstidx;
-
-	firstmb = myx_buf_fill(sc, ring);
-	if (firstmb == NULL)
-		return (slots);
-
-	myx_buf_put(&sc->sc_rx_buf_list[ring], firstmb);
-
-	firstidx = sc->sc_rx_ring_idx[ring];
-	idx = firstidx + 1;
-	idx %= sc->sc_rx_ring_count;
-	slots--;
-
-	while (slots > 0 && (mb = myx_buf_fill(sc, ring)) != NULL) {
-		myx_buf_put(&sc->sc_rx_buf_list[ring], mb);
-
-		rxd.rx_addr = htobe64(mb->mb_map->dm_segs[0].ds_addr);
-		myx_bus_space_write(sc->sc_memt, sc->sc_memh,
-		    offset + idx * sizeof(rxd), &rxd, sizeof(rxd));
-
-		idx++;
-		idx %= sc->sc_rx_ring_count;
-		slots--;
-	}
-
-	/* make sure the first descriptor is seen after the others */
-	if (idx != firstidx + 1) {
-		bus_space_barrier(sc->sc_memt, sc->sc_memh,
-		    offset, sizeof(rxd) * sc->sc_rx_ring_count,
-		    BUS_SPACE_BARRIER_WRITE);
-	}
-
-	rxd.rx_addr = htobe64(firstmb->mb_map->dm_segs[0].ds_addr);
-	myx_write(sc, offset + firstidx * sizeof(rxd),
-	    &rxd, sizeof(rxd));
-
-	sc->sc_rx_ring_idx[ring] = idx;
-
-	return (slots);
-}
-
-int
-myx_rx_fill(struct myx_softc *sc, int ring)
-{
-	u_int slots;
+	u_int idx, firstidx, slots;
 	int rv = 1;
 
 	if (!myx_ring_enter(&sc->sc_rx_ring_lock[ring]))
@@ -1932,12 +1898,44 @@ myx_rx_fill(struct myx_softc *sc, int ri
 		slots = if_rxr_get(&sc->sc_rx_ring[ring], sc->sc_rx_ring_count);
 		mtx_leave(&sc->sc_rx_ring_lock[ring].mrl_mtx);
 
-		if (slots == 0)
+		if (slots-- == 0)
+			continue;
+
+		firstmb = myx_buf_fill(sc, ring);
+		if (firstmb == NULL)
 			continue;
 
-		slots = myx_rx_fill_slots(sc, ring, slots);
 		rv = 0;
+		myx_buf_put(&sc->sc_rx_buf_list[ring], firstmb);
+
+		firstidx = sc->sc_rx_ring_idx[ring];
+		idx = firstidx + 1;
+		idx %= sc->sc_rx_ring_count;
 
+		while (slots > 0 && (mb = myx_buf_fill(sc, ring)) != NULL) {
+			myx_buf_put(&sc->sc_rx_buf_list[ring], mb);
+
+			rxd.rx_addr = htobe64(mb->mb_map->dm_segs[0].ds_addr);
+			myx_bus_space_write(sc->sc_memt, sc->sc_memh,
+			    offset + idx * sizeof(rxd), &rxd, sizeof(rxd));
+
+			idx++;
+			idx %= sc->sc_rx_ring_count;
+			slots--;
+		}
+
+		/* make sure the first descriptor is seen after the others */
+		if (idx != firstidx + 1) {
+			bus_space_barrier(sc->sc_memt, sc->sc_memh,
+			    offset, sizeof(rxd) * sc->sc_rx_ring_count,
+			    BUS_SPACE_BARRIER_WRITE);
+		}
+
+		rxd.rx_addr = htobe64(firstmb->mb_map->dm_segs[0].ds_addr);
+		myx_write(sc, offset + firstidx * sizeof(rxd),
+		    &rxd, sizeof(rxd));
+
+		sc->sc_rx_ring_idx[ring] = idx;
 		mtx_enter(&sc->sc_rx_ring_lock[ring].mrl_mtx);
 		if_rxr_put(&sc->sc_rx_ring[ring], slots);
 		mtx_leave(&sc->sc_rx_ring_lock[ring].mrl_mtx);
@@ -1954,7 +1952,9 @@ myx_buf_fill(struct myx_softc *sc, int r
 	struct mbuf *m;
 	int rv;
 
+	KERNEL_LOCK();
 	m = MCLGETI(NULL, M_DONTWAIT, NULL, sizes[ring]);
+	KERNEL_UNLOCK();
 	if (m == NULL)
 		return (NULL);
 	m->m_len = m->m_pkthdr.len = sizes[ring];
@@ -1963,7 +1963,9 @@ myx_buf_fill(struct myx_softc *sc, int r
 	if (mb == NULL)
 		goto mfree;
 
+	KERNEL_LOCK();
 	rv = bus_dmamap_load_mbuf(sc->sc_dmat, mb->mb_map, m, BUS_DMA_NOWAIT);
+	KERNEL_UNLOCK();
 	if (rv != 0)
 		goto put;
 
@@ -1976,7 +1978,9 @@ myx_buf_fill(struct myx_softc *sc, int r
 put:
 	myx_buf_put(&sc->sc_rx_buf_free[ring], mb);
 mfree:
+	KERNEL_LOCK();
 	m_freem(m);
+	KERNEL_UNLOCK();
 
 	return (NULL);
 }
@@ -2058,16 +2062,27 @@ myx_ring_lock_init(struct myx_ring_lock 
 int
 myx_ring_enter(struct myx_ring_lock *mrl)
 {
-	return (atomic_inc_int_nv(&mrl->mrl_running) == 1);
+	int rv = 1;
+
+	mtx_enter(&mrl->mrl_mtx);
+	if (++mrl->mrl_running > 1)
+		rv = 0;
+	mtx_leave(&mrl->mrl_mtx);
+
+	return (rv);
 }
 
 int
 myx_ring_leave(struct myx_ring_lock *mrl)
 {
-	if (atomic_cas_uint(&mrl->mrl_running, 1, 0) == 1)
-		return (1);
+	int rv = 1;
 
-	mrl->mrl_running = 1;
+	mtx_enter(&mrl->mrl_mtx);
+	if (--mrl->mrl_running > 0) {
+		mrl->mrl_running = 1;
+		rv = 0;
+	}
+	mtx_leave(&mrl->mrl_mtx);
 
-	return (0);
+	return (rv);
 }
Index: kern/subr_pool.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.177
diff -u -p -r1.177 subr_pool.c
--- kern/subr_pool.c	5 Jan 2015 23:54:18 -0000	1.177
+++ kern/subr_pool.c	7 Jan 2015 03:37:52 -0000
@@ -1,4 +1,4 @@
-/*	$OpenBSD: subr_pool.c,v 1.177 2015/01/05 23:54:18 dlg Exp $	*/
+/*	$OpenBSD: subr_pool.c,v 1.138 2014/07/10 13:34:39 tedu Exp $	*/
 /*	$NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $	*/
 
 /*-
@@ -33,15 +33,16 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/pool.h>
 #include <sys/syslog.h>
-#include <sys/rwlock.h>
 #include <sys/sysctl.h>
 
 #include <uvm/uvm_extern.h>
+#include <dev/rndvar.h>
 
 /*
  * Pool resource management utility.
@@ -59,39 +60,28 @@
 /* List of all pools */
 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
 
-/*
- * Every pool gets a unique serial number assigned to it. If this counter
- * wraps, we're screwed, but we shouldn't create so many pools anyway.
- */
-unsigned int pool_serial;
-unsigned int pool_count;
-
-/* Lock the previous variables making up the global pool state */
-struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
-
 /* Private pool for page header structures */
 struct pool phpool;
 
 struct pool_item_header {
 	/* Page headers */
-	TAILQ_ENTRY(pool_item_header)
+	LIST_ENTRY(pool_item_header)
 				ph_pagelist;	/* pool page list */
 	XSIMPLEQ_HEAD(,pool_item) ph_itemlist;	/* chunk list for this page */
 	RB_ENTRY(pool_item_header)
 				ph_node;	/* Off-page page headers */
 	int			ph_nmissing;	/* # of chunks in use */
 	caddr_t			ph_page;	/* this page's address */
-	u_long			ph_magic;
-	int			ph_tick;
+	caddr_t			ph_colored;	/* page's colored address */
+	int			ph_pagesize;
+	int			ph_magic;
 };
-#define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
-#define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
 
 struct pool_item {
-	u_long				pi_magic;
+	u_int32_t pi_magic;
+	/* Other entries use only this list entry */
 	XSIMPLEQ_ENTRY(pool_item)	pi_list;
 };
-#define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
 
 #ifdef POOL_DEBUG
 int	pool_debug = 1;
@@ -102,54 +92,43 @@ int	pool_debug = 0;
 #define	POOL_NEEDS_CATCHUP(pp)						\
 	((pp)->pr_nitems < (pp)->pr_minitems)
 
-#define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
-
-struct pool_item_header *
-	 pool_p_alloc(struct pool *, int, int *);
-void	 pool_p_insert(struct pool *, struct pool_item_header *);
-void	 pool_p_remove(struct pool *, struct pool_item_header *);
-void	 pool_p_free(struct pool *, struct pool_item_header *);
+/*
+ * Every pool gets a unique serial number assigned to it. If this counter
+ * wraps, we're screwed, but we shouldn't create so many pools anyway.
+ */
+unsigned int pool_serial;
 
+int	 pool_catchup(struct pool *);
+void	 pool_prime_page(struct pool *, caddr_t, struct pool_item_header *);
 void	 pool_update_curpage(struct pool *);
-void	*pool_do_get(struct pool *, int, int *);
+void	 pool_swizzle_curpage(struct pool *);
+void	*pool_do_get(struct pool *, int);
+void	 pool_do_put(struct pool *, void *);
+void	 pr_rmpage(struct pool *, struct pool_item_header *,
+	    struct pool_pagelist *);
 int	 pool_chk_page(struct pool *, struct pool_item_header *, int);
 int	 pool_chk(struct pool *);
-void	 pool_get_done(void *, void *);
-void	 pool_runqueue(struct pool *, int);
+struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int);
 
 void	*pool_allocator_alloc(struct pool *, int, int *);
 void	 pool_allocator_free(struct pool *, void *);
 
 /*
- * The default pool allocator.
- */
-void	*pool_page_alloc(struct pool *, int, int *);
-void	pool_page_free(struct pool *, void *);
-
-/*
- * safe for interrupts, name preserved for compat this is the default
- * allocator
+ * XXX - quick hack. For pools with large items we want to use a special
+ *       allocator. For now, instead of having the allocator figure out
+ *       the allocation size from the pool (which can be done trivially
+ *       with round_page(pr_itemsperpage * pr_size)) which would require
+ *	 lots of changes everywhere, we just create allocators for each
+ *	 size. We limit those to 128 pages.
  */
-struct pool_allocator pool_allocator_nointr = {
-	pool_page_alloc,
-	pool_page_free
-};
-
+#define POOL_LARGE_MAXPAGES 128
+struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES];
+struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES];
 void	*pool_large_alloc(struct pool *, int, int *);
 void	pool_large_free(struct pool *, void *);
-
-struct pool_allocator pool_allocator_large = {
-	pool_large_alloc,
-	pool_large_free
-};
-
 void	*pool_large_alloc_ni(struct pool *, int, int *);
 void	pool_large_free_ni(struct pool *, void *);
 
-struct pool_allocator pool_allocator_large_ni = {
-	pool_large_alloc_ni,
-	pool_large_free_ni
-};
 
 #ifdef DDB
 void	 pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
@@ -160,19 +139,16 @@ void	 pool_print1(struct pool *, const c
 
 #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0)
 
-static inline int
+static __inline int
 phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
 {
-	vaddr_t va = (vaddr_t)a->ph_page;
-	vaddr_t vb = (vaddr_t)b->ph_page;
-
-	/* the compares in this order are important for the NFIND to work */
-	if (vb < va)
-		return (-1);
-	if (vb > va)
-		return (1);
-
-	return (0);
+	long diff = (vaddr_t)a->ph_page - (vaddr_t)b->ph_page;
+	if (diff < 0)
+		return -(-diff >= a->ph_pagesize);
+	else if (diff > 0)
+		return (diff >= b->ph_pagesize);
+	else
+		return (0);
 }
 
 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
@@ -181,29 +157,81 @@ RB_GENERATE(phtree, pool_item_header, ph
 /*
  * Return the pool page header based on page address.
  */
-static inline struct pool_item_header *
+static __inline struct pool_item_header *
 pr_find_pagehead(struct pool *pp, void *v)
 {
-	struct pool_item_header *ph, key;
+	struct pool_item_header *ph, tmp;
 
-	if (POOL_INPGHDR(pp)) {
+	if ((pp->pr_roflags & PR_PHINPAGE) != 0) {
 		caddr_t page;
 
-		page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
+		page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask);
 
 		return ((struct pool_item_header *)(page + pp->pr_phoffset));
 	}
 
-	key.ph_page = v;
-	ph = RB_NFIND(phtree, &pp->pr_phtree, &key);
-	if (ph == NULL)
-		panic("%s: %s: page header missing", __func__, pp->pr_wchan);
-
-	KASSERT(ph->ph_page <= (caddr_t)v);
-	if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
-		panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
+	/*
+	 * The trick we're using in the tree compare function is to compare
+	 * two elements equal when they overlap. We want to return the
+	 * page header that belongs to the element just before this address.
+	 * We don't want this element to compare equal to the next element,
+	 * so the compare function takes the pagesize from the lower element.
+	 * If this header is the lower, its pagesize is zero, so it can't
+	 * overlap with the next header. But if the header we're looking for
+	 * is lower, we'll use its pagesize and it will overlap and return
+	 * equal.
+	 */
+	tmp.ph_page = v;
+	tmp.ph_pagesize = 0;
+	ph = RB_FIND(phtree, &pp->pr_phtree, &tmp);
+
+	if (ph) {
+		KASSERT(ph->ph_page <= (caddr_t)v);
+		KASSERT(ph->ph_page + ph->ph_pagesize > (caddr_t)v);
+	}
+	return ph;
+}
 
-	return (ph);
+/*
+ * Remove a page from the pool.
+ */
+void
+pr_rmpage(struct pool *pp, struct pool_item_header *ph,
+    struct pool_pagelist *pq)
+{
+
+	/*
+	 * If the page was idle, decrement the idle page count.
+	 */
+	if (ph->ph_nmissing == 0) {
+#ifdef DIAGNOSTIC
+		if (pp->pr_nidle == 0)
+			panic("pr_rmpage: nidle inconsistent");
+		if (pp->pr_nitems < pp->pr_itemsperpage)
+			panic("pr_rmpage: nitems inconsistent");
+#endif
+		pp->pr_nidle--;
+	}
+
+	pp->pr_nitems -= pp->pr_itemsperpage;
+
+	/*
+	 * Unlink a page from the pool and release it (or queue it for release).
+	 */
+	LIST_REMOVE(ph, ph_pagelist);
+	if ((pp->pr_roflags & PR_PHINPAGE) == 0)
+		RB_REMOVE(phtree, &pp->pr_phtree, ph);
+	pp->pr_npages--;
+	pp->pr_npagefree++;
+	pool_update_curpage(pp);
+
+	if (pq) {
+		LIST_INSERT_HEAD(pq, ph, ph_pagelist);
+	} else {
+		pool_allocator_free(pp, ph->ph_page);
+		if ((pp->pr_roflags & PR_PHINPAGE) == 0)
+			pool_put(&phpool, ph);
+	}
 }
 
 /*
@@ -216,12 +244,62 @@ void
 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
     const char *wchan, struct pool_allocator *palloc)
 {
-	int off = 0;
-	unsigned int pgsize = PAGE_SIZE, items;
+	int off, slack;
 #ifdef DIAGNOSTIC
 	struct pool *iter;
-	KASSERT(ioff == 0);
+
+	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
+		if (iter == pp)
+			panic("init pool already on list");
+	}
+#endif
+
+#ifdef MALLOC_DEBUG
+	if ((flags & PR_DEBUG) && (ioff != 0 || align != 0))
+		flags &= ~PR_DEBUG;
 #endif
+	/*
+	 * Check arguments and construct default values.
+	 */
+	if (palloc == NULL) {
+		if (size > PAGE_SIZE) {
+			int psize;
+
+			/*
+			 * XXX - should take align into account as well.
+			 */
+			if (size == round_page(size))
+				psize = size / PAGE_SIZE;
+			else
+				psize = PAGE_SIZE / roundup(size % PAGE_SIZE,
+				    1024);
+			if (psize > POOL_LARGE_MAXPAGES)
+				psize = POOL_LARGE_MAXPAGES;
+			if (flags & PR_WAITOK)
+				palloc = &pool_allocator_large_ni[psize-1];
+			else
+				palloc = &pool_allocator_large[psize-1];
+			if (palloc->pa_pagesz == 0) {
+				palloc->pa_pagesz = psize * PAGE_SIZE;
+				if (flags & PR_WAITOK) {
+					palloc->pa_alloc = pool_large_alloc_ni;
+					palloc->pa_free = pool_large_free_ni;
+				} else {
+					palloc->pa_alloc = pool_large_alloc;
+					palloc->pa_free = pool_large_free;
+				}
+			}
+		} else {
+			palloc = &pool_allocator_nointr;
+		}
+	}
+	if (palloc->pa_pagesz == 0) {
+		palloc->pa_pagesz = PAGE_SIZE;
+	}
+	if (palloc->pa_pagemask == 0) {
+		palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);
+		palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;
+	}
 
 	if (align == 0)
 		align = ALIGN(1);
@@ -230,53 +308,27 @@ pool_init(struct pool *pp, size_t size, 
 		size = sizeof(struct pool_item);
 
 	size = roundup(size, align);
-
-	if (palloc == NULL) {
-		while (size > pgsize)
-			pgsize <<= 1;
-
-		if (pgsize > PAGE_SIZE) {
-			palloc = ISSET(flags, PR_WAITOK) ?
-			    &pool_allocator_large_ni : &pool_allocator_large;
-		} else
-			palloc = &pool_allocator_nointr;
-	} else
-		pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE;
-
-	items = pgsize / size;
-
-	/*
-	 * Decide whether to put the page header off page to avoid
-	 * wasting too large a part of the page. Off-page page headers
-	 * go into an RB tree, so we can match a returned item with
-	 * its header based on the page address.
-	 */
-	if (pgsize - (size * items) > sizeof(struct pool_item_header)) {
-		off = pgsize - sizeof(struct pool_item_header);
-	} else if (sizeof(struct pool_item_header) * 2 >= size) {
-		off = pgsize - sizeof(struct pool_item_header);
-		items = off / size;
-	}
-
-	KASSERT(items > 0);
+#ifdef DIAGNOSTIC
+	if (size > palloc->pa_pagesz)
+		panic("pool_init: pool item size (%lu) too large",
+		    (u_long)size);
+#endif
 
 	/*
 	 * Initialize the pool structure.
 	 */
-	memset(pp, 0, sizeof(*pp));
-	TAILQ_INIT(&pp->pr_emptypages);
-	TAILQ_INIT(&pp->pr_fullpages);
-	TAILQ_INIT(&pp->pr_partpages);
+	LIST_INIT(&pp->pr_emptypages);
+	LIST_INIT(&pp->pr_fullpages);
+	LIST_INIT(&pp->pr_partpages);
 	pp->pr_curpage = NULL;
 	pp->pr_npages = 0;
 	pp->pr_minitems = 0;
 	pp->pr_minpages = 0;
 	pp->pr_maxpages = 8;
+	pp->pr_roflags = flags;
+	pp->pr_flags = 0;
 	pp->pr_size = size;
-	pp->pr_pgsize = pgsize;
-	pp->pr_pgmask = ~0UL ^ (pgsize - 1);
-	pp->pr_phoffset = off;
-	pp->pr_itemsperpage = items;
+	pp->pr_align = align;
 	pp->pr_wchan = wchan;
 	pp->pr_alloc = palloc;
 	pp->pr_nitems = 0;
@@ -287,7 +339,47 @@ pool_init(struct pool *pp, size_t size, 
 	pp->pr_hardlimit_ratecap.tv_usec = 0;
 	pp->pr_hardlimit_warning_last.tv_sec = 0;
 	pp->pr_hardlimit_warning_last.tv_usec = 0;
-	RB_INIT(&pp->pr_phtree);
+	pp->pr_serial = ++pool_serial;
+	if (pool_serial == 0)
+		panic("pool_init: too much uptime");
+
+	/*
+	 * Decide whether to put the page header off page to avoid
+	 * wasting too large a part of the page. Off-page page headers
+	 * go into an RB tree, so we can match a returned item with
+	 * its header based on the page address.
+	 * We use 1/16 of the page size as the threshold (XXX: tune)
+	 */
+	if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) {
+		/* Use the end of the page for the page header */
+		pp->pr_roflags |= PR_PHINPAGE;
+		pp->pr_phoffset = off = palloc->pa_pagesz -
+		    ALIGN(sizeof(struct pool_item_header));
+	} else {
+		/* The page header will be taken from our page header pool */
+		pp->pr_phoffset = 0;
+		off = palloc->pa_pagesz;
+		RB_INIT(&pp->pr_phtree);
+	}
+
+	/*
+	 * Alignment is to take place at `ioff' within the item. This means
+	 * we must reserve up to `align - 1' bytes on the page to allow
+	 * appropriate positioning of each item.
+	 *
+	 * Silently enforce `0 <= ioff < align'.
+	 */
+	pp->pr_itemoffset = ioff = ioff % align;
+	pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
+	KASSERT(pp->pr_itemsperpage != 0);
+
+	/*
+	 * Use the slack between the chunks and the page header
+	 * for "cache coloring".
+	 */
+	slack = off - pp->pr_itemsperpage * pp->pr_size;
+	pp->pr_maxcolor = (slack / align) * align;
+	pp->pr_curcolor = 0;
 
 	pp->pr_nget = 0;
 	pp->pr_nfail = 0;
@@ -299,37 +391,18 @@ pool_init(struct pool *pp, size_t size, 
 
 	pp->pr_ipl = -1;
 	mtx_init(&pp->pr_mtx, IPL_NONE);
-	mtx_init(&pp->pr_requests_mtx, IPL_NONE);
-	TAILQ_INIT(&pp->pr_requests);
 
 	if (phpool.pr_size == 0) {
 		pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
 		    0, "phpool", NULL);
 		pool_setipl(&phpool, IPL_HIGH);
-
-		/* make sure phpool wont "recurse" */
-		KASSERT(POOL_INPGHDR(&phpool));
 	}
 
 	/* pglistalloc/constraint parameters */
 	pp->pr_crange = &kp_dirty;
 
 	/* Insert this into the list of all pools. */
-	rw_enter_write(&pool_lock);
-#ifdef DIAGNOSTIC
-	SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
-		if (iter == pp)
-			panic("%s: pool %s already on list", __func__, wchan);
-	}
-#endif
-
-	pp->pr_serial = ++pool_serial;
-	if (pool_serial == 0)
-		panic("%s: too much uptime", __func__);
-
 	SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
-	pool_count++;
-	rw_exit_write(&pool_lock);
 }
 
 void
@@ -337,7 +410,6 @@ pool_setipl(struct pool *pp, int ipl)
 {
 	pp->pr_ipl = ipl;
 	mtx_init(&pp->pr_mtx, ipl);
-	mtx_init(&pp->pr_requests_mtx, ipl);
 }
 
 /*
@@ -349,14 +421,7 @@ pool_destroy(struct pool *pp)
 	struct pool_item_header *ph;
 	struct pool *prev, *iter;
 
-#ifdef DIAGNOSTIC
-	if (pp->pr_nout != 0)
-		panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
-#endif
-
 	/* Remove from global pool list */
-	rw_enter_write(&pool_lock);
-	pool_count--;
 	if (pp == SIMPLEQ_FIRST(&pool_head))
 		SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
 	else {
@@ -365,334 +430,399 @@ pool_destroy(struct pool *pp)
 			if (iter == pp) {
 				SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
 				    pr_poollist);
-				break;
+				goto removed;
 			}
 			prev = iter;
 		}
+#ifdef DIAGNOSTIC
+		panic("destroyed pool not on list");
+#endif
 	}
-	rw_exit_write(&pool_lock);
+removed:
+#ifdef DIAGNOSTIC
+	if (pp->pr_nout != 0)
+		panic("pool_destroy: pool busy: still out: %u", pp->pr_nout);
+#endif
 
 	/* Remove all pages */
-	while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
-		mtx_enter(&pp->pr_mtx);
-		pool_p_remove(pp, ph);
-		mtx_leave(&pp->pr_mtx);
-		pool_p_free(pp, ph);
-	}
-	KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
-	KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
-}
+	while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
+		pr_rmpage(pp, ph, NULL);
+	KASSERT(LIST_EMPTY(&pp->pr_fullpages));
+	KASSERT(LIST_EMPTY(&pp->pr_partpages));
 
-void
-pool_request_init(struct pool_request *pr,
-    void (*handler)(void *, void *), void *cookie)
-{
-	pr->pr_handler = handler;
-	pr->pr_cookie = cookie;
-	pr->pr_item = NULL;
 }
 
-void
-pool_request(struct pool *pp, struct pool_request *pr)
+struct pool_item_header *
+pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
 {
-	mtx_enter(&pp->pr_requests_mtx);
-	TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
-	pool_runqueue(pp, PR_NOWAIT);
-	mtx_leave(&pp->pr_requests_mtx);
-}
+	struct pool_item_header *ph;
 
-struct pool_get_memory {
-	struct mutex mtx;
-	void * volatile v;
-};
+	if ((pp->pr_roflags & PR_PHINPAGE) != 0)
+		ph = (struct pool_item_header *)(storage + pp->pr_phoffset);
+	else
+		ph = pool_get(&phpool, (flags & ~(PR_WAITOK | PR_ZERO)) |
+		    PR_NOWAIT);
+#ifdef DIAGNOSTIC
+	if (pool_debug && ph != NULL)
+		ph->ph_magic = poison_value(ph);
+#endif
+	return (ph);
+}
 
 /*
- * Grab an item from the pool.
+ * Grab an item from the pool; must be called at appropriate spl level
  */
 void *
 pool_get(struct pool *pp, int flags)
 {
-	void *v = NULL;
-	int slowdown = 0;
+	void *v;
 
 	KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
 
+	if ((flags & PR_WAITOK) != 0) {
+#ifdef DIAGNOSTIC
+		assertwaitok();
+		if (pool_debug == 2)
+			yield();
+#endif
+		if (!cold && pool_debug) {
+			KERNEL_UNLOCK();
+			KERNEL_LOCK();
+		}
+	}
 
 	mtx_enter(&pp->pr_mtx);
-	if (pp->pr_nout >= pp->pr_hardlimit) {
-		if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
-			goto fail;
-	} else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
-		if (ISSET(flags, PR_NOWAIT))
-			goto fail;
+#ifdef POOL_DEBUG
+	if (pp->pr_roflags & PR_DEBUGCHK) {
+		if (pool_chk(pp))
+			panic("before pool_get");
 	}
-	mtx_leave(&pp->pr_mtx);
-
-	if (slowdown && ISSET(flags, PR_WAITOK))
-		yield();
-
-	if (v == NULL) {
-		struct pool_get_memory mem = {
-		    MUTEX_INITIALIZER((pp->pr_ipl == -1) ?
-		    IPL_NONE : pp->pr_ipl), NULL };
-		struct pool_request pr;
-
-		pool_request_init(&pr, pool_get_done, &mem);
-		pool_request(pp, &pr);
-
-		mtx_enter(&mem.mtx);
-		while (mem.v == NULL)
-			msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0);
-		mtx_leave(&mem.mtx);
-
-		v = mem.v;
+#endif
+	v = pool_do_get(pp, flags);
+#ifdef POOL_DEBUG
+	if (pp->pr_roflags & PR_DEBUGCHK) {
+		if (pool_chk(pp))
+			panic("after pool_get");
 	}
+#endif
+	if (v != NULL)
+		pp->pr_nget++;
+	mtx_leave(&pp->pr_mtx);
+	if (v == NULL)
+		return (v);
 
-	if (ISSET(flags, PR_ZERO))
+	if (flags & PR_ZERO)
 		memset(v, 0, pp->pr_size);
 
 	return (v);
-
-fail:
-	pp->pr_nfail++;
-	mtx_leave(&pp->pr_mtx);
-	return (NULL);
 }
 
-void
-pool_get_done(void *xmem, void *v)
-{
-	struct pool_get_memory *mem = xmem;
-
-	mtx_enter(&mem->mtx);
-	mem->v = v;
-	mtx_leave(&mem->mtx);
-
-	wakeup_one(mem);
-}
-
-void
-pool_runqueue(struct pool *pp, int flags)
+void *
+pool_do_get(struct pool *pp, int flags)
 {
-	struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
-	struct pool_request *pr;
-
-	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
-	MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx);
-
-	if (pp->pr_requesting++)
-		return;
-
-	do {
-		pp->pr_requesting = 1;
-
-		/* no TAILQ_JOIN? :( */
-		while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
-			TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
-			TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
-		}
-		if (TAILQ_EMPTY(&prl))
-			continue;
-
-		mtx_leave(&pp->pr_requests_mtx);
-
-		mtx_enter(&pp->pr_mtx);
-		pr = TAILQ_FIRST(&prl);
-		while (pr != NULL) {
-			int slowdown = 0;
-
-			if (pp->pr_nout >= pp->pr_hardlimit)
-				break;
-
-			pr->pr_item = pool_do_get(pp, flags, &slowdown);
-			if (pr->pr_item == NULL) /* || slowdown ? */
-				break;
+	struct pool_item *pi;
+	struct pool_item_header *ph;
+	void *v;
+	int slowdown = 0;
 
-			pr = TAILQ_NEXT(pr, pr_entry);
-		}
-		mtx_leave(&pp->pr_mtx);
+#ifdef MALLOC_DEBUG
+	if (pp->pr_roflags & PR_DEBUG) {
+		void *addr;
+
+		addr = NULL;
+		debug_malloc(pp->pr_size, M_DEBUG,
+		    (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr);
+		return (addr);
+	}
+#endif
 
-		while ((pr = TAILQ_FIRST(&prl)) != NULL &&
-		    pr->pr_item != NULL) {
-			TAILQ_REMOVE(&prl, pr, pr_entry);
-			(*pr->pr_handler)(pr->pr_cookie, pr->pr_item);
+startover:
+	/*
+	 * Check to see if we've reached the hard limit.  If we have,
+	 * and we can wait, then wait until an item has been returned to
+	 * the pool.
+	 */
+#ifdef DIAGNOSTIC
+	if (pp->pr_nout > pp->pr_hardlimit)
+		panic("pool_do_get: %s: crossed hard limit", pp->pr_wchan);
+#endif
+	if (pp->pr_nout == pp->pr_hardlimit) {
+		if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
+			/*
+			 * XXX: A warning isn't logged in this case.  Should
+			 * it be?
+			 */
+			pp->pr_flags |= PR_WANTED;
+			pool_sleep(pp);
+			goto startover;
 		}
 
-		mtx_enter(&pp->pr_requests_mtx);
-	} while (--pp->pr_requesting);
+		/*
+		 * Log a message that the hard limit has been hit.
+		 */
+		if (pp->pr_hardlimit_warning != NULL &&
+		    ratecheck(&pp->pr_hardlimit_warning_last,
+		    &pp->pr_hardlimit_ratecap))
+			log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
 
-	/* no TAILQ_JOIN :( */
-	while ((pr = TAILQ_FIRST(&prl)) != NULL) {
-		TAILQ_REMOVE(&prl, pr, pr_entry);
-		TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
+		pp->pr_nfail++;
+		return (NULL);
 	}
-}
-
-void *
-pool_do_get(struct pool *pp, int flags, int *slowdown)
-{
-	struct pool_item *pi;
-	struct pool_item_header *ph;
-
-	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
-
-	if (pp->pr_ipl != -1)
-		splassert(pp->pr_ipl);
 
+	pool_swizzle_curpage(pp);
 	/*
-	 * Account for this item now to avoid races if we need to give up
-	 * pr_mtx to allocate a page.
+	 * The convention we use is that if `curpage' is not NULL, then
+	 * it points at a non-empty bucket. In particular, `curpage'
+	 * never points at a page header which has PR_PHINPAGE set and
+	 * has no items in its bucket.
 	 */
-	pp->pr_nout++;
+	if ((ph = pp->pr_curpage) == NULL) {
+#ifdef DIAGNOSTIC
+		if (pp->pr_nitems != 0) {
+			printf("pool_do_get: %s: curpage NULL, nitems %u\n",
+			    pp->pr_wchan, pp->pr_nitems);
+			panic("pool_do_get: nitems inconsistent");
+		}
+#endif
 
-	if (pp->pr_curpage == NULL) {
-		mtx_leave(&pp->pr_mtx);
-		ph = pool_p_alloc(pp, flags, slowdown);
-		mtx_enter(&pp->pr_mtx);
+		/*
+		 * Call the back-end page allocator for more memory.
+		 */
+		v = pool_allocator_alloc(pp, flags, &slowdown);
+		if (v != NULL)
+			ph = pool_alloc_item_header(pp, v, flags);
+
+		if (v == NULL || ph == NULL) {
+			if (v != NULL)
+				pool_allocator_free(pp, v);
+
+			if ((flags & PR_WAITOK) == 0) {
+				pp->pr_nfail++;
+				return (NULL);
+			}
 
-		if (ph == NULL) {
-			pp->pr_nout--;
-			return (NULL);
+			/*
+			 * Wait for items to be returned to this pool.
+			 *
+			 * XXX: maybe we should wake up once a second and
+			 * try again?
+			 */
+			pp->pr_flags |= PR_WANTED;
+			pool_sleep(pp);
+			goto startover;
 		}
 
-		pool_p_insert(pp, ph);
-	}
+		/* We have more memory; add it to the pool */
+		pool_prime_page(pp, v, ph);
+		pp->pr_npagealloc++;
 
-	ph = pp->pr_curpage;
-	pi = XSIMPLEQ_FIRST(&ph->ph_itemlist);
-	if (__predict_false(pi == NULL))
-		panic("%s: %s: page empty", __func__, pp->pr_wchan);
-
-	if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
-		panic("%s: %s free list modified: "
-		    "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
-		    __func__, pp->pr_wchan, ph->ph_page, pi,
-		    0, pi->pi_magic, POOL_IMAGIC(ph, pi));
-	}
+		if (slowdown && (flags & PR_WAITOK)) {
+			mtx_leave(&pp->pr_mtx);
+			yield();
+			mtx_enter(&pp->pr_mtx);
+		}
 
-	XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list);
+		/* Start the allocation process over. */
+		goto startover;
+	}
+	if ((v = pi = XSIMPLEQ_FIRST(&ph->ph_itemlist)) == NULL) {
+		panic("pool_do_get: %s: page empty", pp->pr_wchan);
+	}
+#ifdef DIAGNOSTIC
+	if (pp->pr_nitems == 0) {
+		printf("pool_do_get: %s: items on itemlist, nitems %u\n",
+		    pp->pr_wchan, pp->pr_nitems);
+		panic("pool_do_get: nitems inconsistent");
+	}
+#endif
 
 #ifdef DIAGNOSTIC
-	if (pool_debug && POOL_PHPOISON(ph)) {
+	if (pi->pi_magic != poison_value(pi))
+		panic("pool_do_get(%s): free list modified: "
+		    "page %p; item addr %p; offset 0x%x=0x%x",
+		    pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic);
+	if (pool_debug && ph->ph_magic) {
 		size_t pidx;
 		uint32_t pval;
 		if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
 		    &pidx, &pval)) {
 			int *ip = (int *)(pi + 1);
-			panic("%s: %s free list modified: "
+			panic("pool_do_get(%s): free list modified: "
 			    "page %p; item addr %p; offset 0x%zx=0x%x",
-			    __func__, pp->pr_wchan, ph->ph_page, pi,
+			    pp->pr_wchan, ph->ph_page, pi,
 			    pidx * sizeof(int), ip[pidx]);
 		}
 	}
 #endif /* DIAGNOSTIC */
 
-	if (ph->ph_nmissing++ == 0) {
+	/*
+	 * Remove from item list.
+	 */
+	XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list);
+	pp->pr_nitems--;
+	pp->pr_nout++;
+	if (ph->ph_nmissing == 0) {
+#ifdef DIAGNOSTIC
+		if (pp->pr_nidle == 0)
+			panic("pool_do_get: nidle inconsistent");
+#endif
+		pp->pr_nidle--;
+
 		/*
 		 * This page was previously empty.  Move it to the list of
 		 * partially-full pages.  This page is already curpage.
 		 */
-		TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist);
-		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist);
-
-		pp->pr_nidle--;
+		LIST_REMOVE(ph, ph_pagelist);
+		LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
 	}
-
-	if (ph->ph_nmissing == pp->pr_itemsperpage) {
+	ph->ph_nmissing++;
+	if (XSIMPLEQ_EMPTY(&ph->ph_itemlist)) {
+#ifdef DIAGNOSTIC
+		if (ph->ph_nmissing != pp->pr_itemsperpage) {
+			panic("pool_do_get: %s: nmissing inconsistent",
+			    pp->pr_wchan);
+		}
+#endif
 		/*
 		 * This page is now full.  Move it to the full list
 		 * and select a new current page.
 		 */
-		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist);
-		TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist);
+		LIST_REMOVE(ph, ph_pagelist);
+		LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
 		pool_update_curpage(pp);
 	}
 
-	pp->pr_nget++;
-
-	return (pi);
+	/*
+	 * If we have a low water mark and we are now below that low
+	 * water mark, add more items to the pool.
+	 */
+	if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
+		/*
+		 * XXX: Should we log a warning?  Should we set up a timeout
+		 * to try again in a second or so?  The latter could break
+		 * a caller's assumptions about interrupt protection, etc.
+		 */
+	}
+	return (v);
 }
 
 /*
- * Return resource to the pool.
+ * Return resource to the pool; must be called at appropriate spl level
  */
 void
 pool_put(struct pool *pp, void *v)
 {
+	mtx_enter(&pp->pr_mtx);
+#ifdef POOL_DEBUG
+	if (pp->pr_roflags & PR_DEBUGCHK) {
+		if (pool_chk(pp))
+			panic("before pool_put");
+	}
+#endif
+	pool_do_put(pp, v);
+#ifdef POOL_DEBUG
+	if (pp->pr_roflags & PR_DEBUGCHK) {
+		if (pool_chk(pp))
+			panic("after pool_put");
+	}
+#endif
+	pp->pr_nput++;
+	mtx_leave(&pp->pr_mtx);
+}
+
+/*
+ * Internal version of pool_put().
+ */
+void
+pool_do_put(struct pool *pp, void *v)
+{
 	struct pool_item *pi = v;
-	struct pool_item_header *ph, *freeph = NULL;
-	extern int ticks;
+	struct pool_item_header *ph;
 
-#ifdef DIAGNOSTIC
 	if (v == NULL)
-		panic("%s: NULL item", __func__);
-#endif
+		panic("pool_put of NULL");
 
-	mtx_enter(&pp->pr_mtx);
+#ifdef MALLOC_DEBUG
+	if (pp->pr_roflags & PR_DEBUG) {
+		debug_free(v, M_DEBUG);
+		return;
+	}
+#endif
 
+#ifdef DIAGNOSTIC
 	if (pp->pr_ipl != -1)
 		splassert(pp->pr_ipl);
 
-	ph = pr_find_pagehead(pp, v);
+	if (pp->pr_nout == 0) {
+		printf("pool %s: putting with none out\n",
+		    pp->pr_wchan);
+		panic("pool_do_put");
+	}
+#endif
 
+	if ((ph = pr_find_pagehead(pp, v)) == NULL) {
+		panic("pool_do_put: %s: page header missing", pp->pr_wchan);
+	}
+
+	/*
+	 * Return to item list.
+	 */
 #ifdef DIAGNOSTIC
 	if (pool_debug) {
 		struct pool_item *qi;
-		XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) {
-			if (pi == qi) {
-				panic("%s: %s: double pool_put: %p", __func__,
-				    pp->pr_wchan, pi);
-			}
-		}
+		XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list)
+			if (pi == qi)
+				panic("double pool_put: %p", pi);
+	}
+	pi->pi_magic = poison_value(pi);
+	if (ph->ph_magic) {
+		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
 	}
 #endif /* DIAGNOSTIC */
 
-	pi->pi_magic = POOL_IMAGIC(ph, pi);
 	XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
-#ifdef DIAGNOSTIC
-	if (POOL_PHPOISON(ph))
-		poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
-#endif /* DIAGNOSTIC */
+	ph->ph_nmissing--;
+	pp->pr_nitems++;
+	pp->pr_nout--;
 
-	if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
-		/*
-		 * The page was previously completely full, move it to the
-		 * partially-full list.
-		 */
-		TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist);
-		TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist);
+	/* Cancel "pool empty" condition if it exists */
+	if (pp->pr_curpage == NULL)
+		pp->pr_curpage = ph;
+
+	if (pp->pr_flags & PR_WANTED) {
+		pp->pr_flags &= ~PR_WANTED;
+		wakeup(pp);
 	}
 
+	/*
+	 * If this page is now empty, do one of two things:
+	 *
+	 *	(1) If we have more pages than the page high water mark,
+	 *	    free the page back to the system.
+	 *
+	 *	(2) Otherwise, move the page to the empty page list.
+	 *
+	 * Either way, select a new current page (so we use a partially-full
+	 * page if one is available).
+	 */
 	if (ph->ph_nmissing == 0) {
-		/*
-		 * The page is now empty, so move it to the empty page list.
-	 	 */
 		pp->pr_nidle++;
-
-		ph->ph_tick = ticks;
-		TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist);
-		TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist);
-		pool_update_curpage(pp);
+		if (pp->pr_nidle > pp->pr_maxpages) {
+			pr_rmpage(pp, ph, NULL);
+		} else {
+			LIST_REMOVE(ph, ph_pagelist);
+			LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
+			pool_update_curpage(pp);
+		}
 	}
-
-	pp->pr_nout--;
-	pp->pr_nput++;
-
-	/* is it time to free a page? */
-	if (pp->pr_nidle > pp->pr_maxpages &&
-	    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
-	    (ticks - ph->ph_tick) > hz) {
-		freeph = ph;
-		pool_p_remove(pp, freeph);
+	/*
+	 * If the page was previously completely full, move it to the
+	 * partially-full list.
+	 */
+	else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
+		LIST_REMOVE(ph, ph_pagelist);
+		LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
 	}
-	mtx_leave(&pp->pr_mtx);
-
-	if (freeph != NULL)
-		pool_p_free(pp, freeph);
-
-	mtx_enter(&pp->pr_requests_mtx);
-	pool_runqueue(pp, PR_NOWAIT);
-	mtx_leave(&pp->pr_requests_mtx);
 }
 
 /*
@@ -701,193 +831,200 @@ pool_put(struct pool *pp, void *v)
 int
 pool_prime(struct pool *pp, int n)
 {
-	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
 	struct pool_item_header *ph;
+	caddr_t cp;
 	int newpages;
+	int slowdown;
 
+	mtx_enter(&pp->pr_mtx);
 	newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 
 	while (newpages-- > 0) {
-		int slowdown = 0;
-
-		ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
-		if (ph == NULL) /* or slowdown? */
+		cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown);
+		if (cp != NULL)
+			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
+		if (cp == NULL || ph == NULL) {
+			if (cp != NULL)
+				pool_allocator_free(pp, cp);
 			break;
+		}
 
-		TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist);
+		pool_prime_page(pp, cp, ph);
+		pp->pr_npagealloc++;
+		pp->pr_minpages++;
 	}
 
-	mtx_enter(&pp->pr_mtx);
-	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
-		TAILQ_REMOVE(&pl, ph, ph_pagelist);
-		pool_p_insert(pp, ph);
-	}
-	mtx_leave(&pp->pr_mtx);
+	if (pp->pr_minpages >= pp->pr_maxpages)
+		pp->pr_maxpages = pp->pr_minpages + 1;	/* XXX */
 
+	mtx_leave(&pp->pr_mtx);
 	return (0);
 }
 
-struct pool_item_header *
-pool_p_alloc(struct pool *pp, int flags, int *slowdown)
+/*
+ * Add a page worth of items to the pool.
+ *
+ * Note, we must be called with the pool descriptor LOCKED.
+ */
+void
+pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
 {
-	struct pool_item_header *ph;
 	struct pool_item *pi;
-	caddr_t addr;
-	int n; 
-
-	MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
-	KASSERT(pp->pr_size >= sizeof(*pi));
-
-	addr = pool_allocator_alloc(pp, flags, slowdown);
-	if (addr == NULL)
-		return (NULL);
-
-	if (POOL_INPGHDR(pp))
-		ph = (struct pool_item_header *)(addr + pp->pr_phoffset);
-	else {
-		ph = pool_get(&phpool, flags);
-		if (ph == NULL) {
-			pool_allocator_free(pp, addr);
-			return (NULL);
-		}
-	}
+	caddr_t cp = storage;
+	unsigned int align = pp->pr_align;
+	unsigned int ioff = pp->pr_itemoffset;
+	int n;
 
+	/*
+	 * Insert page header.
+	 */
+	LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
 	XSIMPLEQ_INIT(&ph->ph_itemlist);
-	ph->ph_page = addr;
+	ph->ph_page = storage;
+	ph->ph_pagesize = pp->pr_alloc->pa_pagesz;
 	ph->ph_nmissing = 0;
-	arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
-#ifdef DIAGNOSTIC
-	/* use a bit in ph_magic to record if we poison page items */
-	if (pool_debug)
-		SET(ph->ph_magic, POOL_MAGICBIT);
-	else
-		CLR(ph->ph_magic, POOL_MAGICBIT);
-#endif /* DIAGNOSTIC */
+	if ((pp->pr_roflags & PR_PHINPAGE) == 0)
+		RB_INSERT(phtree, &pp->pr_phtree, ph);
 
-	n = pp->pr_itemsperpage;
-	while (n--) {
-		pi = (struct pool_item *)addr;
-		pi->pi_magic = POOL_IMAGIC(ph, pi);
-		XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
+	pp->pr_nidle++;
 
-#ifdef DIAGNOSTIC
-		if (POOL_PHPOISON(ph))
-			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
-#endif /* DIAGNOSTIC */
+	/*
+	 * Color this page.
+	 */
+	cp = (caddr_t)(cp + pp->pr_curcolor);
+	if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
+		pp->pr_curcolor = 0;
 
-		addr += pp->pr_size;
-	}
+	/*
+	 * Adjust storage to apply alignment to `pr_itemoffset' in each item.
+	 */
+	if (ioff != 0)
+		cp = (caddr_t)(cp + (align - ioff));
+	ph->ph_colored = cp;
 
-	return (ph);
-}
+	/*
+	 * Insert remaining chunks on the bucket list.
+	 */
+	n = pp->pr_itemsperpage;
+	pp->pr_nitems += n;
 
-void
-pool_p_free(struct pool *pp, struct pool_item_header *ph)
-{
-	struct pool_item *pi;
+	while (n--) {
+		pi = (struct pool_item *)cp;
 
-        MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
-        KASSERT(ph->ph_nmissing == 0);
+		KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0);
 
-	XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
-		if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
-			panic("%s: %s free list modified: "
-			    "page %p; item addr %p; offset 0x%x=0x%lx",
-			    __func__, pp->pr_wchan, ph->ph_page, pi,
-			    0, pi->pi_magic);
-		}
+		/* Insert on page list */
+		XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
 
 #ifdef DIAGNOSTIC
-		if (POOL_PHPOISON(ph)) {
-			size_t pidx;
-			uint32_t pval;
-			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
-			    &pidx, &pval)) {
-				int *ip = (int *)(pi + 1);
-				panic("%s: %s free list modified: "
-				    "page %p; item addr %p; offset 0x%zx=0x%x",
-				    __func__, pp->pr_wchan, ph->ph_page, pi,
-				    pidx * sizeof(int), ip[pidx]);
-			}
+		pi->pi_magic = poison_value(pi);
+		if (ph->ph_magic) {
+			poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
 		}
-#endif
-        }
-
-        pool_allocator_free(pp, ph->ph_page);
-
-	if (!POOL_INPGHDR(pp))
-		pool_put(&phpool, ph);
-}
-
-void
-pool_p_insert(struct pool *pp, struct pool_item_header *ph)
-{
-        MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
+#endif /* DIAGNOSTIC */
+		cp = (caddr_t)(cp + pp->pr_size);
+	}
 
-	/* If the pool was depleted, point at the new page */
+	/*
+	 * If the pool was depleted, point at the new page.
+	 */
 	if (pp->pr_curpage == NULL)
 		pp->pr_curpage = ph;
 
-	TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist);
-	if (!POOL_INPGHDR(pp))
-		RB_INSERT(phtree, &pp->pr_phtree, ph);
-
-	pp->pr_nitems += pp->pr_itemsperpage;
-	pp->pr_nidle++;
-
-	pp->pr_npagealloc++;
 	if (++pp->pr_npages > pp->pr_hiwat)
 		pp->pr_hiwat = pp->pr_npages;
 }
 
-void
-pool_p_remove(struct pool *pp, struct pool_item_header *ph)
+/*
+ * Used by pool_get() when nitems drops below the low water mark.  This
+ * is used to catch up pr_nitems with the low water mark.
+ *
+ * Note we never wait for memory here, we let the caller decide what to do.
+ */
+int
+pool_catchup(struct pool *pp)
 {
-	MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
-
-	pp->pr_npagefree++;
-	pp->pr_npages--;
-	pp->pr_nidle--;
-	pp->pr_nitems -= pp->pr_itemsperpage;
+	struct pool_item_header *ph;
+	caddr_t cp;
+	int error = 0;
+	int slowdown;
 
-	if (!POOL_INPGHDR(pp))
-		RB_REMOVE(phtree, &pp->pr_phtree, ph);
-	TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist);
+	while (POOL_NEEDS_CATCHUP(pp)) {
+		/*
+		 * Call the page back-end allocator for more memory.
+		 */
+		cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown);
+		if (cp != NULL)
+			ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
+		if (cp == NULL || ph == NULL) {
+			if (cp != NULL)
+				pool_allocator_free(pp, cp);
+			error = ENOMEM;
+			break;
+		}
+		pool_prime_page(pp, cp, ph);
+		pp->pr_npagealloc++;
+	}
 
-	pool_update_curpage(pp);
+	return (error);
 }
 
 void
 pool_update_curpage(struct pool *pp)
 {
-	pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
+
+	pp->pr_curpage = LIST_FIRST(&pp->pr_partpages);
 	if (pp->pr_curpage == NULL) {
-		pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
+		pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages);
 	}
 }
 
 void
+pool_swizzle_curpage(struct pool *pp)
+{
+	struct pool_item_header *ph, *next;
+
+	if ((ph = pp->pr_curpage) == NULL)
+		return;
+	if (arc4random_uniform(16) != 0)
+		return;
+	next = LIST_FIRST(&pp->pr_partpages);
+	if (next == ph)
+		next = LIST_NEXT(next, ph_pagelist);
+	if (next == NULL) {
+		next = LIST_FIRST(&pp->pr_emptypages);
+		if (next == ph)
+			next = LIST_NEXT(next, ph_pagelist);
+	}
+	if (next != NULL)
+		pp->pr_curpage = next;
+}
+
+void
 pool_setlowat(struct pool *pp, int n)
 {
-	int prime = 0;
 
-	mtx_enter(&pp->pr_mtx);
 	pp->pr_minitems = n;
 	pp->pr_minpages = (n == 0)
 		? 0
 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 
-	if (pp->pr_nitems < n)
-		prime = n - pp->pr_nitems;
+	mtx_enter(&pp->pr_mtx);
+	/* Make sure we're caught up with the newly-set low water mark. */
+	if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
+		/*
+		 * XXX: Should we log a warning?  Should we set up a timeout
+		 * to try again in a second or so?  The latter could break
+		 * a caller's assumptions about interrupt protection, etc.
+		 */
+	}
 	mtx_leave(&pp->pr_mtx);
-
-	if (prime > 0)
-		pool_prime(pp, prime);
 }
 
 void
 pool_sethiwat(struct pool *pp, int n)
 {
+
 	pp->pr_maxpages = (n == 0)
 		? 0
 		: roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
@@ -928,16 +1065,20 @@ int
 pool_reclaim(struct pool *pp)
 {
 	struct pool_item_header *ph, *phnext;
-	struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
+	struct pool_pagelist pq;
+
+	LIST_INIT(&pq);
 
 	mtx_enter(&pp->pr_mtx);
-	for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
-		phnext = TAILQ_NEXT(ph, ph_pagelist);
+	for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
+		phnext = LIST_NEXT(ph, ph_pagelist);
 
 		/* Check our minimum page claim */
 		if (pp->pr_npages <= pp->pr_minpages)
 			break;
 
+		KASSERT(ph->ph_nmissing == 0);
+
 		/*
 		 * If freeing this page would put us below
 		 * the low water mark, stop now.
@@ -946,17 +1087,18 @@ pool_reclaim(struct pool *pp)
 		    pp->pr_minitems)
 			break;
 
-		pool_p_remove(pp, ph);
-		TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist);
+		pr_rmpage(pp, ph, &pq);
 	}
 	mtx_leave(&pp->pr_mtx);
 
-	if (TAILQ_EMPTY(&pl))
+	if (LIST_EMPTY(&pq))
 		return (0);
-
-	while ((ph = TAILQ_FIRST(&pl)) != NULL) {
-		TAILQ_REMOVE(&pl, ph, ph_pagelist);
-		pool_p_free(pp, ph);
+	while ((ph = LIST_FIRST(&pq)) != NULL) {
+		LIST_REMOVE(ph, ph_pagelist);
+		pool_allocator_free(pp, ph->ph_page);
+		if (pp->pr_roflags & PR_PHINPAGE)
+			continue;
+		pool_put(&phpool, ph);
 	}
 
 	return (1);
@@ -970,11 +1112,12 @@ void
 pool_reclaim_all(void)
 {
 	struct pool	*pp;
+	int		s;
 
-	rw_enter_read(&pool_lock);
+	s = splhigh();
 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
 		pool_reclaim(pp);
-	rw_exit_read(&pool_lock);
+	splx(s);
 }
 
 #ifdef DDB
@@ -997,17 +1140,21 @@ pool_print_pagelist(struct pool_pagelist
     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
 {
 	struct pool_item_header *ph;
+#ifdef DIAGNOSTIC
 	struct pool_item *pi;
+#endif
 
-	TAILQ_FOREACH(ph, pl, ph_pagelist) {
+	LIST_FOREACH(ph, pl, ph_pagelist) {
 		(*pr)("\t\tpage %p, nmissing %d\n",
 		    ph->ph_page, ph->ph_nmissing);
+#ifdef DIAGNOSTIC
 		XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
-			if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
-				(*pr)("\t\t\titem %p, magic 0x%lx\n",
+			if (pi->pi_magic != poison_value(pi)) {
+				(*pr)("\t\t\titem %p, magic 0x%x\n",
 				    pi, pi->pi_magic);
 			}
 		}
+#endif
 	}
 }
 
@@ -1025,7 +1172,9 @@ pool_print1(struct pool *pp, const char 
 		modif++;
 	}
 
-	(*pr)("POOL %s: size %u\n", pp->pr_wchan, pp->pr_size);
+	(*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
+	    pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
+	    pp->pr_roflags);
 	(*pr)("\talloc %p\n", pp->pr_alloc);
 	(*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
 	    pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
@@ -1040,13 +1189,13 @@ pool_print1(struct pool *pp, const char 
 	if (print_pagelist == 0)
 		return;
 
-	if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
+	if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
 		(*pr)("\n\tempty page list:\n");
 	pool_print_pagelist(&pp->pr_emptypages, pr);
-	if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
+	if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL)
 		(*pr)("\n\tfull page list:\n");
 	pool_print_pagelist(&pp->pr_fullpages, pr);
-	if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
+	if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL)
 		(*pr)("\n\tpartial-page list:\n");
 	pool_print_pagelist(&pp->pr_partpages, pr);
 
@@ -1140,8 +1289,9 @@ pool_chk_page(struct pool *pp, struct po
 	int n;
 	const char *label = pp->pr_wchan;
 
-	page = (caddr_t)((u_long)ph & pp->pr_pgmask);
-	if (page != ph->ph_page && POOL_INPGHDR(pp)) {
+	page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask);
+	if (page != ph->ph_page &&
+	    (pp->pr_roflags & PR_PHINPAGE) != 0) {
 		printf("%s: ", label);
 		printf("pool(%p:%s): page inconsistency: page %p; "
 		    "at page head addr %p (p %p)\n",
@@ -1152,17 +1302,17 @@ pool_chk_page(struct pool *pp, struct po
 	for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0;
 	     pi != NULL;
 	     pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) {
-		if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
+
+#ifdef DIAGNOSTIC
+		if (pi->pi_magic != poison_value(pi)) {
 			printf("%s: ", label);
-			printf("pool(%p:%s): free list modified: "
+			printf("pool(%s): free list modified: "
 			    "page %p; item ordinal %d; addr %p "
-			    "(p %p); offset 0x%x=0x%lx\n",
-			    pp, pp->pr_wchan, ph->ph_page, n, pi, page,
+			    "(p %p); offset 0x%x=0x%x\n",
+			    pp->pr_wchan, ph->ph_page, n, pi, page,
 			    0, pi->pi_magic);
 		}
-
-#ifdef DIAGNOSTIC
-		if (POOL_PHPOISON(ph)) {
+		if (pool_debug && ph->ph_magic) {
 			size_t pidx;
 			uint32_t pval;
 			if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
@@ -1176,8 +1326,8 @@ pool_chk_page(struct pool *pp, struct po
 			}
 		}
 #endif /* DIAGNOSTIC */
-
-		page = (caddr_t)((u_long)pi & pp->pr_pgmask);
+		page =
+		    (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask);
 		if (page == ph->ph_page)
 			continue;
 
@@ -1210,11 +1360,11 @@ pool_chk(struct pool *pp)
 	struct pool_item_header *ph;
 	int r = 0;
 
-	TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist)
+	LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist)
 		r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
-	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist)
+	LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist)
 		r += pool_chk_page(pp, ph, 0);
-	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist)
+	LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist)
 		r += pool_chk_page(pp, ph, -1);
 
 	return (r);
@@ -1233,8 +1383,8 @@ pool_walk(struct pool *pp, int full,
 	caddr_t cp;
 	int n;
 
-	TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
-		cp = ph->ph_page;
+	LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
+		cp = ph->ph_colored;
 		n = ph->ph_nmissing;
 
 		while (n--) {
@@ -1243,8 +1393,8 @@ pool_walk(struct pool *pp, int full,
 		}
 	}
 
-	TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
-		cp = ph->ph_page;
+	LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
+		cp = ph->ph_colored;
 		n = ph->ph_nmissing;
 
 		do {
@@ -1270,51 +1420,62 @@ pool_walk(struct pool *pp, int full,
  * kern.pool.name.<pool#> - the name for pool#.
  */
 int
-sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
+sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep)
 {
 	struct kinfo_pool pi;
 	struct pool *pp;
-	int rv = ENOENT;
+	size_t buflen = where != NULL ? *sizep : 0;
+	int npools = 0, s;
+	unsigned int lookfor;
+	size_t len;
 
-	switch (name[0]) {
+	switch (*name) {
 	case KERN_POOL_NPOOLS:
-		if (namelen != 1)
-			return (ENOTDIR);
-		return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
-
+		if (namelen != 1 || buflen != sizeof(int))
+			return (EINVAL);
+		lookfor = 0;
+		break;
 	case KERN_POOL_NAME:
+		if (namelen != 2 || buflen < 1)
+			return (EINVAL);
+		lookfor = name[1];
+		break;
 	case KERN_POOL_POOL:
+		if (namelen != 2 || buflen != sizeof(pi))
+			return (EINVAL);
+		lookfor = name[1];
 		break;
 	default:
-		return (EOPNOTSUPP);
+		return (EINVAL);
 	}
 
-	if (namelen != 2)
-		return (ENOTDIR);
-
-	rw_enter_read(&pool_lock);
+	s = splvm();
 
 	SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
-		if (name[1] == pp->pr_serial)
+		npools++;
+		if (lookfor == pp->pr_serial)
 			break;
 	}
 
-	if (pp == NULL)
-		goto done;
+	splx(s);
 
-	switch (name[0]) {
+	if (*name != KERN_POOL_NPOOLS && pp == NULL)
+		return (ENOENT);
+
+	switch (*name) {
+	case KERN_POOL_NPOOLS:
+		return copyout(&npools, where, buflen);
 	case KERN_POOL_NAME:
-		rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
-		break;
+		len = strlen(pp->pr_wchan) + 1;
+		if (*sizep < len)
+			return (ENOMEM);
+		*sizep = len;
+		return copyout(pp->pr_wchan, where, len);
 	case KERN_POOL_POOL:
 		memset(&pi, 0, sizeof(pi));
-
-		if (pp->pr_ipl != -1)
-			mtx_enter(&pp->pr_mtx);
 		pi.pr_size = pp->pr_size;
-		pi.pr_pgsize = pp->pr_pgsize;
+		pi.pr_pgsize = pp->pr_alloc->pa_pagesz;
 		pi.pr_itemsperpage = pp->pr_itemsperpage;
-		pi.pr_npages = pp->pr_npages;
 		pi.pr_minpages = pp->pr_minpages;
 		pi.pr_maxpages = pp->pr_maxpages;
 		pi.pr_hardlimit = pp->pr_hardlimit;
@@ -1327,41 +1488,54 @@ sysctl_dopool(int *name, u_int namelen, 
 		pi.pr_npagefree = pp->pr_npagefree;
 		pi.pr_hiwat = pp->pr_hiwat;
 		pi.pr_nidle = pp->pr_nidle;
-		if (pp->pr_ipl != -1)
-			mtx_leave(&pp->pr_mtx);
-
-		rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
-		break;
+		return copyout(&pi, where, buflen);
 	}
-
-done:
-	rw_exit_read(&pool_lock);
-
-	return (rv);
+	/* NOTREACHED */
+	return (0); /* XXX - Stupid gcc */
 }
 
 /*
  * Pool backend allocators.
+ *
+ * Each pool has a backend allocator that handles allocation, deallocation
+ */
+void	*pool_page_alloc(struct pool *, int, int *);
+void	pool_page_free(struct pool *, void *);
+
+/*
+ * safe for interrupts, name preserved for compat this is the default
+ * allocator
+ */
+struct pool_allocator pool_allocator_nointr = {
+	pool_page_alloc, pool_page_free, 0,
+};
+
+/*
+ * XXX - we have at least three different resources for the same allocation
+ *  and each resource can be depleted. First we have the ready elements in
+ *  the pool. Then we have the resource (typically a vm_map) for this
+ *  allocator, then we have physical memory. Waiting for any of these can
+ *  be unnecessary when any other is freed, but the kernel doesn't support
+ *  sleeping on multiple addresses, so we have to fake. The caller sleeps on
+ *  the pool (so that we can be awakened when an item is returned to the pool),
+ *  but we set PA_WANT on the allocator. When a page is returned to
+ *  the allocator and PA_WANT is set pool_allocator_free will wakeup all
+ *  sleeping pools belonging to this allocator. (XXX - thundering herd).
+ *  We also wake up the allocator in case someone without a pool (malloc)
+ *  is sleeping waiting for this allocator.
  */
 
 void *
 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
 {
+	int waitok = flags & PR_WAITOK;
 	void *v;
 
-	KERNEL_LOCK();
-	v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
-	KERNEL_UNLOCK();
-
-#ifdef DIAGNOSTIC
-	if (v != NULL && POOL_INPGHDR(pp)) {
-		vaddr_t addr = (vaddr_t)v;
-		if ((addr & pp->pr_pgmask) != addr) {
-			panic("%s: %s page address %p isnt aligned to %u",
-			    __func__, pp->pr_wchan, v, pp->pr_pgsize);
-		}
-	}
-#endif
+	if (waitok)
+		mtx_leave(&pp->pr_mtx);
+	v = pp->pr_alloc->pa_alloc(pp, flags, slowdown);
+	if (waitok)
+		mtx_enter(&pp->pr_mtx);
 
 	return (v);
 }
@@ -1371,9 +1545,7 @@ pool_allocator_free(struct pool *pp, voi
 {
 	struct pool_allocator *pa = pp->pr_alloc;
 
-	KERNEL_LOCK();
 	(*pa->pa_free)(pp, v);
-	KERNEL_UNLOCK();
 }
 
 void *
@@ -1381,34 +1553,31 @@ pool_page_alloc(struct pool *pp, int fla
 {
 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
 
-	kd.kd_waitok = ISSET(flags, PR_WAITOK);
+	kd.kd_waitok = (flags & PR_WAITOK);
 	kd.kd_slowdown = slowdown;
 
-	return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
+	return (km_alloc(PAGE_SIZE, &kv_page, pp->pr_crange, &kd));
 }
 
 void
 pool_page_free(struct pool *pp, void *v)
 {
-	km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
+	km_free(v, PAGE_SIZE, &kv_page, pp->pr_crange);
 }
 
 void *
 pool_large_alloc(struct pool *pp, int flags, int *slowdown)
 {
-	struct kmem_va_mode kv = kv_intrsafe;
 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
 	void *v;
 	int s;
 
-	if (POOL_INPGHDR(pp))
-		kv.kv_align = pp->pr_pgsize;
-
-	kd.kd_waitok = ISSET(flags, PR_WAITOK);
+	kd.kd_waitok = (flags & PR_WAITOK);
 	kd.kd_slowdown = slowdown;
 
 	s = splvm();
-	v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
+	v = km_alloc(pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange,
+	    &kd);
 	splx(s);
 
 	return (v);
@@ -1417,39 +1586,26 @@ pool_large_alloc(struct pool *pp, int fl
 void
 pool_large_free(struct pool *pp, void *v)
 {
-	struct kmem_va_mode kv = kv_intrsafe;
 	int s;
 
-	if (POOL_INPGHDR(pp))
-		kv.kv_align = pp->pr_pgsize;
-
 	s = splvm();
-	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
+	km_free(v, pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange);
 	splx(s);
 }
 
 void *
 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown)
 {
-	struct kmem_va_mode kv = kv_any;
 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
 
-	if (POOL_INPGHDR(pp))
-		kv.kv_align = pp->pr_pgsize;
-
-	kd.kd_waitok = ISSET(flags, PR_WAITOK);
+	kd.kd_waitok = (flags & PR_WAITOK);
 	kd.kd_slowdown = slowdown;
 
-	return (km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd));
+	return (km_alloc(pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange, &kd));
 }
 
 void
 pool_large_free_ni(struct pool *pp, void *v)
 {
-	struct kmem_va_mode kv = kv_any;
-
-	if (POOL_INPGHDR(pp))
-		kv.kv_align = pp->pr_pgsize;
-
-	km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
+	km_free(v, pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange);
 }
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.199
diff -u -p -r1.199 uipc_mbuf.c
--- kern/uipc_mbuf.c	11 Dec 2014 19:21:57 -0000	1.199
+++ kern/uipc_mbuf.c	7 Jan 2015 03:37:52 -0000
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uipc_mbuf.c,v 1.199 2014/12/11 19:21:57 tedu Exp $	*/
+/*	$OpenBSD: uipc_mbuf.c,v 1.192 2014/07/13 15:52:38 tedu Exp $	*/
 /*	$NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $	*/
 
 /*
@@ -74,6 +74,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
@@ -95,7 +96,6 @@
 #endif
 
 struct	mbstat mbstat;		/* mbuf stats */
-struct	mutex mbstatmtx = MUTEX_INITIALIZER(IPL_NET);
 struct	pool mbpool;		/* mbuf pool */
 struct	pool mtagpool;
 
@@ -118,8 +118,6 @@ int max_linkhdr;		/* largest link-level 
 int max_protohdr;		/* largest protocol header */
 int max_hdr;			/* largest link+protocol header */
 
-struct	mutex m_extref_mtx = MUTEX_INITIALIZER(IPL_NET);
-
 void	m_extfree(struct mbuf *);
 struct mbuf *m_copym0(struct mbuf *, int, int, int, int);
 void	nmbclust_update(void);
@@ -138,14 +136,11 @@ mbinit(void)
 	int i;
 
 #if DIAGNOSTIC
-	if (mclsizes[0] != MCLBYTES)
-		panic("mbinit: the smallest cluster size != MCLBYTES");
 	if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES)
 		panic("mbinit: the largest cluster size != MAXMCLBYTES");
 #endif
 
-	pool_init(&mbpool, MSIZE, 0, 0, 0, "mbufpl", NULL);
-	pool_setipl(&mbpool, IPL_NET);
+	pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", NULL);
 	pool_set_constraints(&mbpool, &kp_dma_contig);
 	pool_setlowat(&mbpool, mblowat);
 
@@ -158,7 +153,6 @@ mbinit(void)
 		    mclsizes[i] >> 10);
 		pool_init(&mclpools[i], mclsizes[i], 0, 0, 0,
 		    mclnames[i], NULL);
-		pool_setipl(&mclpools[i], IPL_NET);
 		pool_set_constraints(&mclpools[i], &kp_dma_contig);
 		pool_setlowat(&mclpools[i], mcllowat);
 	}
@@ -190,6 +184,21 @@ nmbclust_update(void)
 	pool_sethiwat(&mbpool, nmbclust);
 }
 
+void
+m_reclaim(void *arg, int flags)
+{
+	struct domain *dp;
+	struct protosw *pr;
+	int s = splnet();
+
+	for (dp = domains; dp; dp = dp->dom_next)
+		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+			if (pr->pr_drain)
+				(*pr->pr_drain)();
+	mbstat.m_drain++;
+	splx(s);
+}
+
 /*
  * Space allocation routines.
  */
@@ -197,21 +206,20 @@ struct mbuf *
 m_get(int nowait, int type)
 {
 	struct mbuf *m;
+	int s;
 
+	s = splnet();
 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
-	if (m == NULL)
-		return (NULL);
-
-	mtx_enter(&mbstatmtx);
-	mbstat.m_mtypes[type]++;
-	mtx_leave(&mbstatmtx);
-
-	m->m_type = type;
-	m->m_next = NULL;
-	m->m_nextpkt = NULL;
-	m->m_data = m->m_dat;
-	m->m_flags = 0;
-
+	if (m)
+		mbstat.m_mtypes[type]++;
+	splx(s);
+	if (m) {
+		m->m_type = type;
+		m->m_next = NULL;
+		m->m_nextpkt = NULL;
+		m->m_data = m->m_dat;
+		m->m_flags = 0;
+	}
 	return (m);
 }
 
@@ -223,18 +231,25 @@ struct mbuf *
 m_gethdr(int nowait, int type)
 {
 	struct mbuf *m;
+	int s;
 
+	s = splnet();
 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
-	if (m == NULL)
-		return (NULL);
-
-	mtx_enter(&mbstatmtx);
-	mbstat.m_mtypes[type]++;
-	mtx_leave(&mbstatmtx);
+	if (m)
+		mbstat.m_mtypes[type]++;
+	splx(s);
+	if (m) {
+		m->m_type = type;
 
-	m->m_type = type;
-
-	return (m_inithdr(m));
+		/* keep in sync with m_inithdr */
+		m->m_next = NULL;
+		m->m_nextpkt = NULL;
+		m->m_data = m->m_pktdat;
+		m->m_flags = M_PKTHDR;
+		memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
+		m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
+	}
+	return (m);
 }
 
 struct mbuf *
@@ -284,6 +299,7 @@ m_clget(struct mbuf *m, int how, struct 
 	struct mbuf *m0 = NULL;
 	struct pool *pp;
 	caddr_t buf;
+	int s;
 
 	pp = m_clpool(pktlen);
 #ifdef DIAGNOSTIC
@@ -291,19 +307,23 @@ m_clget(struct mbuf *m, int how, struct 
 		panic("m_clget: request for %u byte cluster", pktlen);
 #endif
 
+	s = splnet();
 	if (m == NULL) {
-		m0 = m_gethdr(how, MT_DATA);
-		if (m0 == NULL)
+		MGETHDR(m0, M_DONTWAIT, MT_DATA);
+		if (m0 == NULL) {
+			splx(s);
 			return (NULL);
-
+		}
 		m = m0;
 	}
 	buf = pool_get(pp, how == M_WAIT ? PR_WAITOK : PR_NOWAIT);
 	if (buf == NULL) {
 		if (m0)
 			m_freem(m0);
+		splx(s);
 		return (NULL);
 	}
+	splx(s);
 
 	MEXTADD(m, buf, pp->pr_size, M_EXTWR, m_extfree_pool, pp);
 	return (m);
@@ -312,18 +332,16 @@ m_clget(struct mbuf *m, int how, struct 
 void
 m_extfree_pool(caddr_t buf, u_int size, void *pp)
 {
+	splassert(IPL_NET);
 	pool_put(pp, buf);
 }
 
 struct mbuf *
-m_free(struct mbuf *m)
+m_free_unlocked(struct mbuf *m)
 {
 	struct mbuf *n;
 
-	mtx_enter(&mbstatmtx);
 	mbstat.m_mtypes[m->m_type]--;
-	mtx_leave(&mbstatmtx);
-
 	n = m->m_next;
 	if (m->m_flags & M_ZEROIZE) {
 		m_zero(m);
@@ -335,68 +353,54 @@ m_free(struct mbuf *m)
 		m_tag_delete_chain(m);
 	if (m->m_flags & M_EXT)
 		m_extfree(m);
-
 	pool_put(&mbpool, m);
 
 	return (n);
 }
 
-void
-m_extref(struct mbuf *o, struct mbuf *n)
+struct mbuf *
+m_free(struct mbuf *m)
 {
-	int refs = MCLISREFERENCED(o);
-
-	n->m_flags |= o->m_flags & (M_EXT|M_EXTWR);
+	struct mbuf *n;
+	int s;
 
-	if (refs)
-		mtx_enter(&m_extref_mtx);
-	n->m_ext.ext_nextref = o->m_ext.ext_nextref;
-	n->m_ext.ext_prevref = o;
-	o->m_ext.ext_nextref = n;
-	n->m_ext.ext_nextref->m_ext.ext_prevref = n;
-	if (refs)
-		mtx_leave(&m_extref_mtx);
+	s = splnet();
+	n = m_free_unlocked(m);
+	splx(s);
 
-	MCLREFDEBUGN((n), __FILE__, __LINE__);
+	return (n);
 }
 
-static inline u_int
-m_extunref(struct mbuf *m)
+void
+m_extfree(struct mbuf *m)
 {
-	int refs = 1;
-
-	if (!MCLISREFERENCED(m))
-		return (0);
-
-	mtx_enter(&m_extref_mtx);
 	if (MCLISREFERENCED(m)) {
 		m->m_ext.ext_nextref->m_ext.ext_prevref =
 		    m->m_ext.ext_prevref;
 		m->m_ext.ext_prevref->m_ext.ext_nextref =
 		    m->m_ext.ext_nextref;
-	} else
-		refs = 0;
-	mtx_leave(&m_extref_mtx);
-
-	return (refs);
-}
-
-void
-m_extfree(struct mbuf *m)
-{
-	if (m_extunref(m) == 0) {
+	} else if (m->m_ext.ext_free)
 		(*(m->m_ext.ext_free))(m->m_ext.ext_buf,
 		    m->m_ext.ext_size, m->m_ext.ext_arg);
-	}
-
+	else
+		panic("unknown type of extension buffer");
+	m->m_ext.ext_size = 0;
 	m->m_flags &= ~(M_EXT|M_EXTWR);
 }
 
 void
 m_freem(struct mbuf *m)
 {
-	while (m != NULL)
-		m = m_free(m);
+	struct mbuf *n;
+	int s;
+
+	if (m == NULL)
+		return;
+	s = splnet();
+	do {
+		n = m_free_unlocked(m);
+	} while ((m = n) != NULL);
+	splx(s);
 }
 
 /*
@@ -432,8 +436,12 @@ m_defrag(struct mbuf *m, int how)
 	/* free chain behind and possible ext buf on the first mbuf */
 	m_freem(m->m_next);
 	m->m_next = NULL;
-	if (m->m_flags & M_EXT)
+
+	if (m->m_flags & M_EXT) {
+		int s = splnet();
 		m_extfree(m);
+		splx(s);
+	}
 
 	/*
 	 * Bounce copy mbuf over to the original mbuf and set everything up.
@@ -606,7 +614,7 @@ m_copydata(struct mbuf *m, int off, int 
 		if (m == NULL)
 			panic("m_copydata: null mbuf");
 		count = min(m->m_len - off, len);
-		memmove(cp, mtod(m, caddr_t) + off, count);
+		bcopy(mtod(m, caddr_t) + off, cp, count);
 		len -= count;
 		cp += count;
 		off = 0;
@@ -659,7 +667,7 @@ m_copyback(struct mbuf *m0, int off, int
 			m->m_len += min(len - (m->m_len - off),
 			    M_TRAILINGSPACE(m));
 		mlen = min(m->m_len - off, len);
-		memmove(mtod(m, caddr_t) + off, cp, mlen);
+		bcopy(cp, mtod(m, caddr_t) + off, (size_t)mlen);
 		cp += mlen;
 		len -= mlen;
 		totlen += mlen + off;
@@ -712,8 +720,8 @@ m_cat(struct mbuf *m, struct mbuf *n)
 			return;
 		}
 		/* splat the data from one into the other */
-		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
-		    n->m_len);
+		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
+		    (u_int)n->m_len);
 		m->m_len += n->m_len;
 		n = m_free(n);
 	}
@@ -841,8 +849,8 @@ m_pullup(struct mbuf *n, int len)
 
 	do {
 		count = min(len, n->m_len);
-		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
-		    count);
+		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
+		    (unsigned)count);
 		len -= count;
 		m->m_len += count;
 		n->m_len -= count;
@@ -1012,7 +1020,7 @@ extpacket:
 		MCLADDREFERENCE(m, n);
 		n->m_data = m->m_data + len;
 	} else {
-		memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
+		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
 	}
 	n->m_len = remain;
 	m->m_len = len;
@@ -1233,156 +1241,6 @@ m_print(void *v,
 		    m->m_ext.ext_free, m->m_ext.ext_arg);
 		(*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n",
 		    m->m_ext.ext_nextref, m->m_ext.ext_prevref);
-
 	}
 }
 #endif
-
-/*
- * mbuf lists
- */
-
-void ml_join(struct mbuf_list *, struct mbuf_list *);
-
-void
-ml_init(struct mbuf_list *ml)
-{
-	ml->ml_head = ml->ml_tail = NULL;
-	ml->ml_len = 0;
-}
-
-void
-ml_enqueue(struct mbuf_list *ml, struct mbuf *m)
-{
-	if (ml->ml_tail == NULL)
-		ml->ml_head = ml->ml_tail = m;
-	else {
-		ml->ml_tail->m_nextpkt = m;
-		ml->ml_tail = m;
-	}
-
-	m->m_nextpkt = NULL;
-	ml->ml_len++;
-}
-
-void
-ml_join(struct mbuf_list *mla, struct mbuf_list *mlb)
-{
-	if (mla->ml_tail == NULL)
-		*mla = *mlb;
-	else if (mlb->ml_tail != NULL) {
-		mla->ml_tail->m_nextpkt = mlb->ml_head;
-		mla->ml_tail = mlb->ml_tail;
-		mla->ml_len += mlb->ml_len;
-
-		ml_init(mlb);
-	}
-}
-
-struct mbuf *
-ml_dequeue(struct mbuf_list *ml)
-{
-	struct mbuf *m;
-
-	m = ml->ml_head;
-	if (m != NULL) {
-		ml->ml_head = m->m_nextpkt;
-		if (ml->ml_head == NULL)
-			ml->ml_tail = NULL;
-
-		m->m_nextpkt = NULL;
-		ml->ml_len--;
-	}
-
-	return (m);
-}
-
-struct mbuf *
-ml_dechain(struct mbuf_list *ml)
-{
-	struct mbuf *m0;
-
-	m0 = ml->ml_head;
-
-	ml_init(ml);
-
-	return (m0);
-}
-
-/*
- * mbuf queues
- */
-
-void
-mq_init(struct mbuf_queue *mq, u_int maxlen, int ipl)
-{
-	mtx_init(&mq->mq_mtx, ipl);
-	ml_init(&mq->mq_list);
-	mq->mq_maxlen = maxlen;
-}
-
-int
-mq_enqueue(struct mbuf_queue *mq, struct mbuf *m)
-{
-	int dropped = 0;
-
-	mtx_enter(&mq->mq_mtx);
-	if (mq_len(mq) < mq->mq_maxlen)
-		ml_enqueue(&mq->mq_list, m);
-	else {
-		mq->mq_drops++;
-		dropped = 1;
-	}
-	mtx_leave(&mq->mq_mtx);
-
-	if (dropped)
-		m_freem(m);
-
-	return (dropped);
-}
-
-struct mbuf *
-mq_dequeue(struct mbuf_queue *mq)
-{
-	struct mbuf *m;
-
-	mtx_enter(&mq->mq_mtx);
-	m = ml_dequeue(&mq->mq_list);
-	mtx_leave(&mq->mq_mtx);
-
-	return (m);
-}
-
-int
-mq_enlist(struct mbuf_queue *mq, struct mbuf_list *ml)
-{
-	int full;
-
-	mtx_enter(&mq->mq_mtx);
-	ml_join(&mq->mq_list, ml);
-	full = mq_len(mq) >= mq->mq_maxlen;
-	mtx_leave(&mq->mq_mtx);
-
-	return (full);
-}
-
-void
-mq_delist(struct mbuf_queue *mq, struct mbuf_list *ml)
-{
-	mtx_enter(&mq->mq_mtx);
-	*ml = mq->mq_list;
-	ml_init(&mq->mq_list);
-	mtx_leave(&mq->mq_mtx);
-}
-
-struct mbuf *
-mq_dechain(struct mbuf_queue *mq)
-{
-	struct mbuf *m0;
-
-	mtx_enter(&mq->mq_mtx);
-	m0 = ml_dechain(&mq->mq_list);
-	mtx_leave(&mq->mq_mtx);
-
-	return (m0);
-}
Index: sys/mbuf.h
===================================================================
RCS file: /cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.183
diff -u -p -r1.183 mbuf.h
--- sys/mbuf.h	3 Oct 2014 01:02:47 -0000	1.183
+++ sys/mbuf.h	7 Jan 2015 03:37:52 -0000
@@ -1,4 +1,4 @@
-/*	$OpenBSD: mbuf.h,v 1.183 2014/10/03 01:02:47 dlg Exp $	*/
+/*	$OpenBSD: mbuf.h,v 1.180 2014/07/13 09:52:48 dlg Exp $	*/
 /*	$NetBSD: mbuf.h,v 1.19 1996/02/09 18:25:14 christos Exp $	*/
 
 /*
@@ -36,6 +36,7 @@
 #define _SYS_MBUF_H_
 
 #include <sys/malloc.h>
+#include <sys/pool.h>
 #include <sys/queue.h>
 
 /*
@@ -245,6 +246,8 @@ struct mbuf {
 
 /*
  * Macros for tracking external storage associated with an mbuf.
+ *
+ * Note: add and delete reference must be called at splnet().
  */
 #ifdef DEBUG
 #define MCLREFDEBUGN(m, file, line) do {				\
@@ -262,7 +265,16 @@ struct mbuf {
 
 #define	MCLISREFERENCED(m)	((m)->m_ext.ext_nextref != (m))
 
-#define	MCLADDREFERENCE(o, n)	m_extref((o), (n))
+#define	MCLADDREFERENCE(o, n)	do {					\
+		int ms = splnet();					\
+		(n)->m_flags |= ((o)->m_flags & (M_EXT|M_EXTWR));	\
+		(n)->m_ext.ext_nextref = (o)->m_ext.ext_nextref;	\
+		(n)->m_ext.ext_prevref = (o);				\
+		(o)->m_ext.ext_nextref = (n);				\
+		(n)->m_ext.ext_nextref->m_ext.ext_prevref = (n);	\
+		splx(ms);						\
+		MCLREFDEBUGN((n), __FILE__, __LINE__);			\
+	} while (/* CONSTCOND */ 0)
 
 #define	MCLINITREFERENCE(m)	do {					\
 		(m)->m_ext.ext_prevref = (m);				\
@@ -400,6 +412,7 @@ void	mbinit(void);
 struct	mbuf *m_copym2(struct mbuf *, int, int, int);
 struct	mbuf *m_copym(struct mbuf *, int, int, int);
 struct	mbuf *m_free(struct mbuf *);
+struct	mbuf *m_free_unlocked(struct mbuf *);
 struct	mbuf *m_get(int, int);
 struct	mbuf *m_getclr(int, int);
 struct	mbuf *m_gethdr(int, int);
@@ -414,7 +427,6 @@ struct  mbuf *m_getptr(struct mbuf *, in
 int	m_leadingspace(struct mbuf *);
 int	m_trailingspace(struct mbuf *);
 struct mbuf *m_clget(struct mbuf *, int, struct ifnet *, u_int);
-void	m_extref(struct mbuf *, struct mbuf *);
 void	m_extfree_pool(caddr_t, u_int, void *);
 void	m_adj(struct mbuf *, int);
 int	m_copyback(struct mbuf *, int, int, const void *, int);
@@ -467,57 +479,6 @@ struct m_tag *m_tag_next(struct mbuf *, 
  * has payload larger than the value below.
  */
 #define PACKET_TAG_MAXSIZE		52
-
-/*
- * mbuf lists
- */
-
-#include <sys/mutex.h>
-
-struct mbuf_list {
-	struct mbuf		*ml_head;
-	struct mbuf		*ml_tail;
-	u_int			ml_len;
-};
-
-#define MBUF_LIST_INITIALIZER() { NULL, NULL, 0 }
-
-void			ml_init(struct mbuf_list *);
-void			ml_enqueue(struct mbuf_list *, struct mbuf *);
-struct mbuf *		ml_dequeue(struct mbuf_list *);
-struct mbuf *		ml_dechain(struct mbuf_list *);
-
-#define	ml_len(_ml)		((_ml)->ml_len)
-#define	ml_empty(_ml)		((_ml)->ml_len == 0)
-
-#define MBUF_LIST_FOREACH(_ml, _m) \
-	for ((_m) = (_ml)->ml_head; (_m) != NULL; (_m) = (_m)->m_nextpkt)
-
-/*
- * mbuf queues
- */
-
-struct mbuf_queue {
-	struct mutex		mq_mtx;
-	struct mbuf_list	mq_list;
-	u_int			mq_maxlen;
-	u_int			mq_drops;
-};
-
-#define MBUF_QUEUE_INITIALIZER(_maxlen, _ipl) \
-    { MUTEX_INITIALIZER(_ipl), MBUF_LIST_INITIALIZER(), (_maxlen), 0 }
-
-void			mq_init(struct mbuf_queue *, u_int, int);
-int			mq_enqueue(struct mbuf_queue *, struct mbuf *);
-struct mbuf *		mq_dequeue(struct mbuf_queue *);
-int			mq_enlist(struct mbuf_queue *, struct mbuf_list *);
-void			mq_delist(struct mbuf_queue *, struct mbuf_list *);
-struct mbuf *		mq_dechain(struct mbuf_queue *);
-
-#define	mq_len(_mq)		ml_len(&(_mq)->mq_list)
-#define	mq_empty(_mq)		ml_empty(&(_mq)->mq_list)
-#define	mq_drops(_mq)		((_mq)->mq_drops)
-#define	mq_set_maxlen(_mq, _l)	((_mq)->mq_maxlen = (_l))
 
 #endif /* _KERNEL */
 #endif /* _SYS_MBUF_H_ */
Index: sys/pool.h
===================================================================
RCS file: /cvs/src/sys/sys/pool.h,v
retrieving revision 1.56
diff -u -p -r1.56 pool.h
--- sys/pool.h	22 Dec 2014 02:59:54 -0000	1.56
+++ sys/pool.h	7 Jan 2015 03:37:52 -0000
@@ -1,4 +1,4 @@
-/*	$OpenBSD: pool.h,v 1.56 2014/12/22 02:59:54 tedu Exp $	*/
+/*	$OpenBSD: pool.h,v 1.47 2014/07/02 00:23:36 dlg Exp $	*/
 /*	$NetBSD: pool.h,v 1.27 2001/06/06 22:00:17 rafal Exp $	*/
 
 /*-
@@ -73,16 +73,16 @@ struct kinfo_pool {
 #include <sys/mutex.h>
 
 struct pool;
-struct pool_request;
-TAILQ_HEAD(pool_requests, pool_request);
 
 struct pool_allocator {
 	void *(*pa_alloc)(struct pool *, int, int *);
 	void (*pa_free)(struct pool *, void *);
 	int pa_pagesz;
+	int pa_pagemask;
+	int pa_pageshift;
 };
 
-TAILQ_HEAD(pool_pagelist, pool_item_header);
+LIST_HEAD(pool_pagelist, pool_item_header);
 
 struct pool {
 	struct mutex	pr_mtx;
@@ -97,6 +97,8 @@ struct pool {
 	struct pool_item_header	*
 			pr_curpage;
 	unsigned int	pr_size;	/* Size of item */
+	unsigned int	pr_align;	/* Requested alignment, must be 2^n */
+	unsigned int	pr_itemoffset;	/* Align this offset in item */
 	unsigned int	pr_minitems;	/* minimum # of items to keep */
 	unsigned int	pr_minpages;	/* same in page units */
 	unsigned int	pr_maxpages;	/* maximum # of idle pages to keep */
@@ -108,16 +110,19 @@ struct pool {
 	unsigned int	pr_hardlimit;	/* hard limit to number of allocated
 					   items */
 	unsigned int	pr_serial;	/* unique serial number of the pool */
-	unsigned int	pr_pgsize;	/* Size of a "page" */
-	vaddr_t		pr_pgmask;	/* Mask with an item to get a page */
 	struct pool_allocator *
 			pr_alloc;	/* backend allocator */
 	const char *	pr_wchan;	/* tsleep(9) identifier */
+	unsigned int	pr_flags;	/* r/w flags */
+	unsigned int	pr_roflags;	/* r/o flags */
 #define PR_WAITOK	0x0001 /* M_WAITOK */
 #define PR_NOWAIT	0x0002 /* M_NOWAIT */
 #define PR_LIMITFAIL	0x0004 /* M_CANFAIL */
 #define PR_ZERO		0x0008 /* M_ZERO */
 #define PR_WANTED	0x0100
+#define PR_PHINPAGE	0x0200
+#define PR_LOGGING	0x0400
+#define PR_DEBUG	0x0800
 #define PR_DEBUGCHK	0x1000
 
 	int		pr_ipl;
@@ -125,6 +130,8 @@ struct pool {
 	RB_HEAD(phtree, pool_item_header)
 			pr_phtree;
 
+	int		pr_maxcolor;	/* Cache colouring */
+	int		pr_curcolor;
 	int		pr_phoffset;	/* Offset in page of page header */
 
 	/*
@@ -136,14 +143,6 @@ struct pool {
 	struct timeval	pr_hardlimit_warning_last;
 
 	/*
-	 * pool item requests queue
-	 */
-	struct mutex	pr_requests_mtx;
-	struct pool_requests
-			pr_requests;
-	unsigned int	pr_requesting;
-
-	/*
 	 * Instrumentation
 	 */
 	unsigned long	pr_nget;	/* # of successful requests */
@@ -165,13 +164,7 @@ struct pool {
 
 extern struct pool_allocator pool_allocator_nointr;
 
-struct pool_request {
-	TAILQ_ENTRY(pool_request) pr_entry;
-	void (*pr_handler)(void *, void *);
-	void *pr_cookie;
-	void *pr_item;
-};
-
+/* these functions are not locked */
 void		pool_init(struct pool *, size_t, u_int, u_int, int,
 		    const char *, struct pool_allocator *);
 void		pool_destroy(struct pool *);
@@ -183,10 +176,8 @@ struct uvm_constraint_range; /* XXX */
 void		pool_set_constraints(struct pool *,
 		    const struct kmem_pa_mode *mode);
 
+/* these functions are locked */
 void		*pool_get(struct pool *, int) __malloc;
-void		pool_request_init(struct pool_request *,
-		    void (*)(void *, void *), void *);
-void		pool_request(struct pool *, struct pool_request *);
 void		pool_put(struct pool *, void *);
 int		pool_reclaim(struct pool *);
 void		pool_reclaim_all(void);
Index: uvm/uvm_swap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_swap.c,v
retrieving revision 1.132
diff -u -p -r1.132 uvm_swap.c
--- uvm/uvm_swap.c	23 Dec 2014 04:47:30 -0000	1.132
+++ uvm/uvm_swap.c	7 Jan 2015 03:37:52 -0000
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_swap.c,v 1.132 2014/12/23 04:47:30 tedu Exp $	*/
+/*	$OpenBSD: uvm_swap.c,v 1.131 2014/11/18 02:37:31 tedu Exp $	*/
 /*	$NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $	*/
 
 /*
@@ -201,6 +201,25 @@ struct vndbuf {
 struct pool vndxfer_pool;
 struct pool vndbuf_pool;
 
+#define	getvndxfer(vnx)	do {						\
+	int s = splbio();						\
+	vnx = pool_get(&vndxfer_pool, PR_WAITOK);			\
+	splx(s);							\
+} while (0)
+
+#define putvndxfer(vnx) {						\
+	pool_put(&vndxfer_pool, (void *)(vnx));				\
+}
+
+#define	getvndbuf(vbp)	do {						\
+	int s = splbio();						\
+	vbp = pool_get(&vndbuf_pool, PR_WAITOK);			\
+	splx(s);							\
+} while (0)
+
+#define putvndbuf(vbp) {						\
+	pool_put(&vndbuf_pool, (void *)(vbp));				\
+}
 
 /*
  * local variables
@@ -279,10 +298,8 @@ uvm_swap_init(void)
 	/* allocate pools for structures used for swapping to files. */
 	pool_init(&vndxfer_pool, sizeof(struct vndxfer), 0, 0, 0, "swp vnx",
 	    NULL);
-	pool_setipl(&vndxfer_pool, IPL_BIO);
 	pool_init(&vndbuf_pool, sizeof(struct vndbuf), 0, 0, 0, "swp vnd",
 	    NULL);
-	pool_setipl(&vndbuf_pool, IPL_BIO);
 
 	/* Setup the initial swap partition */
 	swapmount();
@@ -1118,7 +1135,7 @@ sw_reg_strategy(struct swapdev *sdp, str
 	 * allocate a vndxfer head for this transfer and point it to
 	 * our buffer.
 	 */
-	vnx = pool_get(&vndxfer_pool, PR_WAITOK);
+	getvndxfer(vnx);
 	vnx->vx_flags = VX_BUSY;
 	vnx->vx_error = 0;
 	vnx->vx_pending = 0;
@@ -1188,7 +1205,7 @@ sw_reg_strategy(struct swapdev *sdp, str
 		 * at the front of the nbp structure so that you can
 		 * cast pointers between the two structure easily.
 		 */
-		nbp = pool_get(&vndbuf_pool, PR_WAITOK);
+		getvndbuf(nbp);
 		nbp->vb_buf.b_flags    = bp->b_flags | B_CALL;
 		nbp->vb_buf.b_bcount   = sz;
 		nbp->vb_buf.b_bufsize  = sz;
@@ -1233,7 +1250,7 @@ sw_reg_strategy(struct swapdev *sdp, str
 
 		s = splbio();
 		if (vnx->vx_error != 0) {
-			pool_put(&vndbuf_pool, nbp);
+			putvndbuf(nbp);
 			goto out;
 		}
 		vnx->vx_pending++;
@@ -1262,7 +1279,7 @@ out: /* Arrive here at splbio */
 			bp->b_error = vnx->vx_error;
 			bp->b_flags |= B_ERROR;
 		}
-		pool_put(&vndxfer_pool, vnx);
+		putvndxfer(vnx);
 		biodone(bp);
 	}
 	splx(s);
@@ -1337,7 +1354,7 @@ sw_reg_iodone_internal(void *xvbp, void 
 	}
 
 	/* kill vbp structure */
-	pool_put(&vndbuf_pool, vbp);
+	putvndbuf(vbp);
 
 	/*
 	 * wrap up this transaction if it has run to completion or, in
@@ -1348,13 +1365,13 @@ sw_reg_iodone_internal(void *xvbp, void 
 		pbp->b_flags |= B_ERROR;
 		pbp->b_error = vnx->vx_error;
 		if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) {
-			pool_put(&vndxfer_pool, vnx);
+			putvndxfer(vnx);
 			biodone(pbp);
 		}
 	} else if (pbp->b_resid == 0) {
 		KASSERT(vnx->vx_pending == 0);
 		if ((vnx->vx_flags & VX_BUSY) == 0) {
-			pool_put(&vndxfer_pool, vnx);
+			putvndxfer(vnx);
 			biodone(pbp);
 		}
 	}
@@ -1706,9 +1723,11 @@ uvm_swap_io(struct vm_page **pps, int st
 	 * now allocate a buf for the i/o.
 	 * [make sure we don't put the pagedaemon to sleep...]
 	 */
+	s = splbio();
 	pflag = (async || curproc == uvm.pagedaemon_proc) ? PR_NOWAIT :
 	    PR_WAITOK;
-	bp = pool_get(&bufpool, pflag | PR_ZERO);
+	bp = pool_get(&bufpool, pflag);
+	splx(s);
 
 	/*
 	 * if we failed to get a swapbuf, return "try again"