Index: crypto/crypto.c =================================================================== RCS file: /cvs/src/sys/crypto/crypto.c,v retrieving revision 1.72 diff -u -p -r1.72 crypto.c --- crypto/crypto.c 23 Oct 2014 00:15:09 -0000 1.72 +++ crypto/crypto.c 7 Jan 2015 03:37:52 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: crypto.c,v 1.72 2014/10/23 00:15:09 dlg Exp $ */ +/* $OpenBSD: crypto.c,v 1.71 2014/10/23 00:11:48 dlg Exp $ */ /* * The author of this code is Angelos D. Keromytis (angelos@cis.upenn.edu) * @@ -453,16 +453,20 @@ void crypto_freereq(struct cryptop *crp) { struct cryptodesc *crd; + int s; if (crp == NULL) return; + s = splvm(); + while ((crd = crp->crp_desc) != NULL) { crp->crp_desc = crd->crd_next; pool_put(&cryptodesc_pool, crd); } pool_put(&cryptop_pool, crp); + splx(s); } /* @@ -473,14 +477,20 @@ crypto_getreq(int num) { struct cryptodesc *crd; struct cryptop *crp; + int s; + s = splvm(); + crp = pool_get(&cryptop_pool, PR_NOWAIT | PR_ZERO); - if (crp == NULL) + if (crp == NULL) { + splx(s); return NULL; + } while (num--) { crd = pool_get(&cryptodesc_pool, PR_NOWAIT | PR_ZERO); if (crd == NULL) { + splx(s); crypto_freereq(crp); return NULL; } @@ -489,6 +499,7 @@ crypto_getreq(int num) crp->crp_desc = crd; } + splx(s); return crp; } @@ -499,10 +510,8 @@ crypto_init(void) pool_init(&cryptop_pool, sizeof(struct cryptop), 0, 0, 0, "cryptop", NULL); - pool_setipl(&cryptop_pool, IPL_VM); pool_init(&cryptodesc_pool, sizeof(struct cryptodesc), 0, 0, 0, "cryptodesc", NULL); - pool_setipl(&cryptodesc_pool, IPL_VM); } /* Index: dev/pci/if_myx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_myx.c,v retrieving revision 1.72 diff -u -p -r1.72 if_myx.c --- dev/pci/if_myx.c 22 Dec 2014 02:28:52 -0000 1.72 +++ dev/pci/if_myx.c 7 Jan 2015 03:37:52 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: if_myx.c,v 1.72 2014/12/22 02:28:52 tedu Exp $ */ +/* $OpenBSD: if_myx.c,v 1.61 2014/07/12 18:48:51 tedu Exp $ */ /* * Copyright (c) 2007 Reyk Floeter @@ -29,11 +29,10 @@ #include #include #include -#include #include +#include #include #include -#include #include #include @@ -47,8 +46,10 @@ #include #endif +#ifdef INET #include #include +#endif #include #include @@ -206,7 +207,7 @@ void myx_write_txd_tail(struct myx_soft int myx_load_buf(struct myx_softc *, struct myx_buf *, struct mbuf *); int myx_setlladdr(struct myx_softc *, u_int32_t, u_int8_t *); int myx_intr(void *); -void myx_rxeof(struct myx_softc *); +int myx_rxeof(struct myx_softc *); void myx_txeof(struct myx_softc *, u_int32_t); struct myx_buf * myx_buf_alloc(struct myx_softc *, bus_size_t, int, @@ -922,8 +923,10 @@ myx_ioctl(struct ifnet *ifp, u_long cmd, switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; +#ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) arp_ifinit(&sc->sc_ac, ifa); +#endif /* FALLTHROUGH */ case SIOCSIFFLAGS: @@ -1507,23 +1510,27 @@ myx_start(struct ifnet *ifp) return; for (;;) { - if (sc->sc_tx_free <= sc->sc_tx_nsegs || - (mb = myx_buf_get(&sc->sc_tx_buf_free)) == NULL) { + if (sc->sc_tx_free <= sc->sc_tx_nsegs) { SET(ifp->if_flags, IFF_OACTIVE); break; } - IFQ_DEQUEUE(&ifp->if_snd, m); - if (m == NULL) { - myx_buf_put(&sc->sc_tx_buf_free, mb); + IFQ_POLL(&ifp->if_snd, m); + if (m == NULL) + break; + + mb = myx_buf_get(&sc->sc_tx_buf_free); + if (mb == NULL) { + SET(ifp->if_flags, IFF_OACTIVE); break; } + IFQ_DEQUEUE(&ifp->if_snd, m); if (myx_load_buf(sc, mb, m) != 0) { m_freem(m); myx_buf_put(&sc->sc_tx_buf_free, mb); ifp->if_oerrors++; - continue; + break; } #if NBPFILTER > 0 @@ -1646,7 +1653,9 @@ myx_intr(void *arg) enum myx_state state = MYX_S_RUNNING; bus_dmamap_t map = sc->sc_sts_dma.mxm_map; u_int32_t data, link = 0xffffffff; + int refill = 0; u_int8_t valid = 0; + int i; mtx_enter(&sc->sc_sts_mtx); if (sc->sc_state == MYX_S_OFF) { @@ -1693,7 +1702,7 @@ myx_intr(void *arg) data = htobe32(3); if (valid & 0x1) { - myx_rxeof(sc); + refill |= myx_rxeof(sc); bus_space_write_raw_region_4(sc->sc_memt, sc->sc_memh, sc->sc_irqclaimoff, &data, sizeof(data)); @@ -1711,17 +1720,22 @@ myx_intr(void *arg) return (1); } - if (link != 0xffffffff) { - KERNEL_LOCK(); + KERNEL_LOCK(); + if (link != 0xffffffff) myx_link_state(sc, link); - KERNEL_UNLOCK(); - } if (ISSET(ifp->if_flags, IFF_OACTIVE)) { - KERNEL_LOCK(); CLR(ifp->if_flags, IFF_OACTIVE); myx_start(ifp); - KERNEL_UNLOCK(); + } + KERNEL_UNLOCK(); + + for (i = 0; i < 2; i++) { + if (ISSET(refill, 1 << i)) { + if (myx_rx_fill(sc, i) >= 0 && + myx_bufs_empty(&sc->sc_rx_buf_list[i])) + timeout_add(&sc->sc_refill, 0); + } } return (1); @@ -1764,10 +1778,12 @@ myx_txeof(struct myx_softc *sc, u_int32_ bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize, BUS_DMASYNC_POSTWRITE); + KERNEL_LOCK(); bus_dmamap_unload(sc->sc_dmat, map); + m_freem(m); ifp->if_opackets++; + KERNEL_UNLOCK(); - m_freem(m); myx_buf_put(&sc->sc_tx_buf_free, mb); } while (++sc->sc_tx_count != done_count); @@ -1778,15 +1794,15 @@ myx_txeof(struct myx_softc *sc, u_int32_ } } -void +int myx_rxeof(struct myx_softc *sc) { static const struct myx_intrq_desc zerodesc = { 0, 0 }; struct ifnet *ifp = &sc->sc_ac.ac_if; - struct mbuf_list ml = MBUF_LIST_INITIALIZER(); struct myx_buf *mb; struct mbuf *m; int ring; + int rings = 0; u_int rxfree[2] = { 0 , 0 }; u_int len; @@ -1810,14 +1826,22 @@ myx_rxeof(struct myx_softc *sc) bus_dmamap_sync(sc->sc_dmat, mb->mb_map, 0, mb->mb_map->dm_mapsize, BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(sc->sc_dmat, mb->mb_map); m = mb->mb_m; m->m_data += ETHER_ALIGN; m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = len; - ml_enqueue(&ml, m); + KERNEL_LOCK(); + bus_dmamap_unload(sc->sc_dmat, mb->mb_map); +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_IN); +#endif + + ether_input_mbuf(ifp, m); + ifp->if_ipackets++; + KERNEL_UNLOCK(); myx_buf_put(&sc->sc_rx_buf_free[ring], mb); @@ -1835,24 +1859,10 @@ myx_rxeof(struct myx_softc *sc) if_rxr_put(&sc->sc_rx_ring[ring], rxfree[ring]); mtx_leave(&sc->sc_rx_ring_lock[ring].mrl_mtx); - if (myx_rx_fill(sc, ring) >= 0 && - myx_bufs_empty(&sc->sc_rx_buf_list[ring])) - timeout_add(&sc->sc_refill, 0); - } - - ifp->if_ipackets += ml_len(&ml); - - KERNEL_LOCK(); -#if NBPFILTER > 0 - if (ifp->if_bpf) { - MBUF_LIST_FOREACH(&ml, m) - bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_IN); + SET(rings, 1 << ring); } -#endif - while ((m = ml_dequeue(&ml)) != NULL) - ether_input_mbuf(ifp, m); - KERNEL_UNLOCK(); + return (rings); } void @@ -1871,57 +1881,13 @@ myx_rx_zero(struct myx_softc *sc, int ri } } -static inline int -myx_rx_fill_slots(struct myx_softc *sc, int ring, u_int slots) +int +myx_rx_fill(struct myx_softc *sc, int ring) { struct myx_rx_desc rxd; struct myx_buf *mb, *firstmb; u_int32_t offset = sc->sc_rx_ring_offset[ring]; - u_int idx, firstidx; - - firstmb = myx_buf_fill(sc, ring); - if (firstmb == NULL) - return (slots); - - myx_buf_put(&sc->sc_rx_buf_list[ring], firstmb); - - firstidx = sc->sc_rx_ring_idx[ring]; - idx = firstidx + 1; - idx %= sc->sc_rx_ring_count; - slots--; - - while (slots > 0 && (mb = myx_buf_fill(sc, ring)) != NULL) { - myx_buf_put(&sc->sc_rx_buf_list[ring], mb); - - rxd.rx_addr = htobe64(mb->mb_map->dm_segs[0].ds_addr); - myx_bus_space_write(sc->sc_memt, sc->sc_memh, - offset + idx * sizeof(rxd), &rxd, sizeof(rxd)); - - idx++; - idx %= sc->sc_rx_ring_count; - slots--; - } - - /* make sure the first descriptor is seen after the others */ - if (idx != firstidx + 1) { - bus_space_barrier(sc->sc_memt, sc->sc_memh, - offset, sizeof(rxd) * sc->sc_rx_ring_count, - BUS_SPACE_BARRIER_WRITE); - } - - rxd.rx_addr = htobe64(firstmb->mb_map->dm_segs[0].ds_addr); - myx_write(sc, offset + firstidx * sizeof(rxd), - &rxd, sizeof(rxd)); - - sc->sc_rx_ring_idx[ring] = idx; - - return (slots); -} - -int -myx_rx_fill(struct myx_softc *sc, int ring) -{ - u_int slots; + u_int idx, firstidx, slots; int rv = 1; if (!myx_ring_enter(&sc->sc_rx_ring_lock[ring])) @@ -1932,12 +1898,44 @@ myx_rx_fill(struct myx_softc *sc, int ri slots = if_rxr_get(&sc->sc_rx_ring[ring], sc->sc_rx_ring_count); mtx_leave(&sc->sc_rx_ring_lock[ring].mrl_mtx); - if (slots == 0) + if (slots-- == 0) + continue; + + firstmb = myx_buf_fill(sc, ring); + if (firstmb == NULL) continue; - slots = myx_rx_fill_slots(sc, ring, slots); rv = 0; + myx_buf_put(&sc->sc_rx_buf_list[ring], firstmb); + + firstidx = sc->sc_rx_ring_idx[ring]; + idx = firstidx + 1; + idx %= sc->sc_rx_ring_count; + while (slots > 0 && (mb = myx_buf_fill(sc, ring)) != NULL) { + myx_buf_put(&sc->sc_rx_buf_list[ring], mb); + + rxd.rx_addr = htobe64(mb->mb_map->dm_segs[0].ds_addr); + myx_bus_space_write(sc->sc_memt, sc->sc_memh, + offset + idx * sizeof(rxd), &rxd, sizeof(rxd)); + + idx++; + idx %= sc->sc_rx_ring_count; + slots--; + } + + /* make sure the first descriptor is seen after the others */ + if (idx != firstidx + 1) { + bus_space_barrier(sc->sc_memt, sc->sc_memh, + offset, sizeof(rxd) * sc->sc_rx_ring_count, + BUS_SPACE_BARRIER_WRITE); + } + + rxd.rx_addr = htobe64(firstmb->mb_map->dm_segs[0].ds_addr); + myx_write(sc, offset + firstidx * sizeof(rxd), + &rxd, sizeof(rxd)); + + sc->sc_rx_ring_idx[ring] = idx; mtx_enter(&sc->sc_rx_ring_lock[ring].mrl_mtx); if_rxr_put(&sc->sc_rx_ring[ring], slots); mtx_leave(&sc->sc_rx_ring_lock[ring].mrl_mtx); @@ -1954,7 +1952,9 @@ myx_buf_fill(struct myx_softc *sc, int r struct mbuf *m; int rv; + KERNEL_LOCK(); m = MCLGETI(NULL, M_DONTWAIT, NULL, sizes[ring]); + KERNEL_UNLOCK(); if (m == NULL) return (NULL); m->m_len = m->m_pkthdr.len = sizes[ring]; @@ -1963,7 +1963,9 @@ myx_buf_fill(struct myx_softc *sc, int r if (mb == NULL) goto mfree; + KERNEL_LOCK(); rv = bus_dmamap_load_mbuf(sc->sc_dmat, mb->mb_map, m, BUS_DMA_NOWAIT); + KERNEL_UNLOCK(); if (rv != 0) goto put; @@ -1976,7 +1978,9 @@ myx_buf_fill(struct myx_softc *sc, int r put: myx_buf_put(&sc->sc_rx_buf_free[ring], mb); mfree: + KERNEL_LOCK(); m_freem(m); + KERNEL_UNLOCK(); return (NULL); } @@ -2058,16 +2062,27 @@ myx_ring_lock_init(struct myx_ring_lock int myx_ring_enter(struct myx_ring_lock *mrl) { - return (atomic_inc_int_nv(&mrl->mrl_running) == 1); + int rv = 1; + + mtx_enter(&mrl->mrl_mtx); + if (++mrl->mrl_running > 1) + rv = 0; + mtx_leave(&mrl->mrl_mtx); + + return (rv); } int myx_ring_leave(struct myx_ring_lock *mrl) { - if (atomic_cas_uint(&mrl->mrl_running, 1, 0) == 1) - return (1); + int rv = 1; - mrl->mrl_running = 1; + mtx_enter(&mrl->mrl_mtx); + if (--mrl->mrl_running > 0) { + mrl->mrl_running = 1; + rv = 0; + } + mtx_leave(&mrl->mrl_mtx); - return (0); + return (rv); } Index: kern/subr_pool.c =================================================================== RCS file: /cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.177 diff -u -p -r1.177 subr_pool.c --- kern/subr_pool.c 5 Jan 2015 23:54:18 -0000 1.177 +++ kern/subr_pool.c 7 Jan 2015 03:37:52 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: subr_pool.c,v 1.177 2015/01/05 23:54:18 dlg Exp $ */ +/* $OpenBSD: subr_pool.c,v 1.138 2014/07/10 13:34:39 tedu Exp $ */ /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ /*- @@ -33,15 +33,16 @@ #include #include +#include #include #include #include #include #include -#include #include #include +#include /* * Pool resource management utility. @@ -59,39 +60,28 @@ /* List of all pools */ SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); -/* - * Every pool gets a unique serial number assigned to it. If this counter - * wraps, we're screwed, but we shouldn't create so many pools anyway. - */ -unsigned int pool_serial; -unsigned int pool_count; - -/* Lock the previous variables making up the global pool state */ -struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); - /* Private pool for page header structures */ struct pool phpool; struct pool_item_header { /* Page headers */ - TAILQ_ENTRY(pool_item_header) + LIST_ENTRY(pool_item_header) ph_pagelist; /* pool page list */ XSIMPLEQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ RB_ENTRY(pool_item_header) ph_node; /* Off-page page headers */ int ph_nmissing; /* # of chunks in use */ caddr_t ph_page; /* this page's address */ - u_long ph_magic; - int ph_tick; + caddr_t ph_colored; /* page's colored address */ + int ph_pagesize; + int ph_magic; }; -#define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ -#define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) struct pool_item { - u_long pi_magic; + u_int32_t pi_magic; + /* Other entries use only this list entry */ XSIMPLEQ_ENTRY(pool_item) pi_list; }; -#define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) #ifdef POOL_DEBUG int pool_debug = 1; @@ -102,54 +92,43 @@ int pool_debug = 0; #define POOL_NEEDS_CATCHUP(pp) \ ((pp)->pr_nitems < (pp)->pr_minitems) -#define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) - -struct pool_item_header * - pool_p_alloc(struct pool *, int, int *); -void pool_p_insert(struct pool *, struct pool_item_header *); -void pool_p_remove(struct pool *, struct pool_item_header *); -void pool_p_free(struct pool *, struct pool_item_header *); +/* + * Every pool gets a unique serial number assigned to it. If this counter + * wraps, we're screwed, but we shouldn't create so many pools anyway. + */ +unsigned int pool_serial; +int pool_catchup(struct pool *); +void pool_prime_page(struct pool *, caddr_t, struct pool_item_header *); void pool_update_curpage(struct pool *); -void *pool_do_get(struct pool *, int, int *); +void pool_swizzle_curpage(struct pool *); +void *pool_do_get(struct pool *, int); +void pool_do_put(struct pool *, void *); +void pr_rmpage(struct pool *, struct pool_item_header *, + struct pool_pagelist *); int pool_chk_page(struct pool *, struct pool_item_header *, int); int pool_chk(struct pool *); -void pool_get_done(void *, void *); -void pool_runqueue(struct pool *, int); +struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int); void *pool_allocator_alloc(struct pool *, int, int *); void pool_allocator_free(struct pool *, void *); /* - * The default pool allocator. - */ -void *pool_page_alloc(struct pool *, int, int *); -void pool_page_free(struct pool *, void *); - -/* - * safe for interrupts, name preserved for compat this is the default - * allocator + * XXX - quick hack. For pools with large items we want to use a special + * allocator. For now, instead of having the allocator figure out + * the allocation size from the pool (which can be done trivially + * with round_page(pr_itemsperpage * pr_size)) which would require + * lots of changes everywhere, we just create allocators for each + * size. We limit those to 128 pages. */ -struct pool_allocator pool_allocator_nointr = { - pool_page_alloc, - pool_page_free -}; - +#define POOL_LARGE_MAXPAGES 128 +struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES]; +struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES]; void *pool_large_alloc(struct pool *, int, int *); void pool_large_free(struct pool *, void *); - -struct pool_allocator pool_allocator_large = { - pool_large_alloc, - pool_large_free -}; - void *pool_large_alloc_ni(struct pool *, int, int *); void pool_large_free_ni(struct pool *, void *); -struct pool_allocator pool_allocator_large_ni = { - pool_large_alloc_ni, - pool_large_free_ni -}; #ifdef DDB void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) @@ -160,19 +139,16 @@ void pool_print1(struct pool *, const c #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0) -static inline int +static __inline int phtree_compare(struct pool_item_header *a, struct pool_item_header *b) { - vaddr_t va = (vaddr_t)a->ph_page; - vaddr_t vb = (vaddr_t)b->ph_page; - - /* the compares in this order are important for the NFIND to work */ - if (vb < va) - return (-1); - if (vb > va) - return (1); - - return (0); + long diff = (vaddr_t)a->ph_page - (vaddr_t)b->ph_page; + if (diff < 0) + return -(-diff >= a->ph_pagesize); + else if (diff > 0) + return (diff >= b->ph_pagesize); + else + return (0); } RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); @@ -181,29 +157,81 @@ RB_GENERATE(phtree, pool_item_header, ph /* * Return the pool page header based on page address. */ -static inline struct pool_item_header * +static __inline struct pool_item_header * pr_find_pagehead(struct pool *pp, void *v) { - struct pool_item_header *ph, key; + struct pool_item_header *ph, tmp; - if (POOL_INPGHDR(pp)) { + if ((pp->pr_roflags & PR_PHINPAGE) != 0) { caddr_t page; - page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); + page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask); return ((struct pool_item_header *)(page + pp->pr_phoffset)); } - key.ph_page = v; - ph = RB_NFIND(phtree, &pp->pr_phtree, &key); - if (ph == NULL) - panic("%s: %s: page header missing", __func__, pp->pr_wchan); - - KASSERT(ph->ph_page <= (caddr_t)v); - if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) - panic("%s: %s: incorrect page", __func__, pp->pr_wchan); + /* + * The trick we're using in the tree compare function is to compare + * two elements equal when they overlap. We want to return the + * page header that belongs to the element just before this address. + * We don't want this element to compare equal to the next element, + * so the compare function takes the pagesize from the lower element. + * If this header is the lower, its pagesize is zero, so it can't + * overlap with the next header. But if the header we're looking for + * is lower, we'll use its pagesize and it will overlap and return + * equal. + */ + tmp.ph_page = v; + tmp.ph_pagesize = 0; + ph = RB_FIND(phtree, &pp->pr_phtree, &tmp); + + if (ph) { + KASSERT(ph->ph_page <= (caddr_t)v); + KASSERT(ph->ph_page + ph->ph_pagesize > (caddr_t)v); + } + return ph; +} - return (ph); +/* + * Remove a page from the pool. + */ +void +pr_rmpage(struct pool *pp, struct pool_item_header *ph, + struct pool_pagelist *pq) +{ + + /* + * If the page was idle, decrement the idle page count. + */ + if (ph->ph_nmissing == 0) { +#ifdef DIAGNOSTIC + if (pp->pr_nidle == 0) + panic("pr_rmpage: nidle inconsistent"); + if (pp->pr_nitems < pp->pr_itemsperpage) + panic("pr_rmpage: nitems inconsistent"); +#endif + pp->pr_nidle--; + } + + pp->pr_nitems -= pp->pr_itemsperpage; + + /* + * Unlink a page from the pool and release it (or queue it for release). + */ + LIST_REMOVE(ph, ph_pagelist); + if ((pp->pr_roflags & PR_PHINPAGE) == 0) + RB_REMOVE(phtree, &pp->pr_phtree, ph); + pp->pr_npages--; + pp->pr_npagefree++; + pool_update_curpage(pp); + + if (pq) { + LIST_INSERT_HEAD(pq, ph, ph_pagelist); + } else { + pool_allocator_free(pp, ph->ph_page); + if ((pp->pr_roflags & PR_PHINPAGE) == 0) + pool_put(&phpool, ph); + } } /* @@ -216,12 +244,62 @@ void pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, const char *wchan, struct pool_allocator *palloc) { - int off = 0; - unsigned int pgsize = PAGE_SIZE, items; + int off, slack; #ifdef DIAGNOSTIC struct pool *iter; - KASSERT(ioff == 0); + + SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { + if (iter == pp) + panic("init pool already on list"); + } +#endif + +#ifdef MALLOC_DEBUG + if ((flags & PR_DEBUG) && (ioff != 0 || align != 0)) + flags &= ~PR_DEBUG; #endif + /* + * Check arguments and construct default values. + */ + if (palloc == NULL) { + if (size > PAGE_SIZE) { + int psize; + + /* + * XXX - should take align into account as well. + */ + if (size == round_page(size)) + psize = size / PAGE_SIZE; + else + psize = PAGE_SIZE / roundup(size % PAGE_SIZE, + 1024); + if (psize > POOL_LARGE_MAXPAGES) + psize = POOL_LARGE_MAXPAGES; + if (flags & PR_WAITOK) + palloc = &pool_allocator_large_ni[psize-1]; + else + palloc = &pool_allocator_large[psize-1]; + if (palloc->pa_pagesz == 0) { + palloc->pa_pagesz = psize * PAGE_SIZE; + if (flags & PR_WAITOK) { + palloc->pa_alloc = pool_large_alloc_ni; + palloc->pa_free = pool_large_free_ni; + } else { + palloc->pa_alloc = pool_large_alloc; + palloc->pa_free = pool_large_free; + } + } + } else { + palloc = &pool_allocator_nointr; + } + } + if (palloc->pa_pagesz == 0) { + palloc->pa_pagesz = PAGE_SIZE; + } + if (palloc->pa_pagemask == 0) { + palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); + palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; + } if (align == 0) align = ALIGN(1); @@ -230,53 +308,27 @@ pool_init(struct pool *pp, size_t size, size = sizeof(struct pool_item); size = roundup(size, align); - - if (palloc == NULL) { - while (size > pgsize) - pgsize <<= 1; - - if (pgsize > PAGE_SIZE) { - palloc = ISSET(flags, PR_WAITOK) ? - &pool_allocator_large_ni : &pool_allocator_large; - } else - palloc = &pool_allocator_nointr; - } else - pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE; - - items = pgsize / size; - - /* - * Decide whether to put the page header off page to avoid - * wasting too large a part of the page. Off-page page headers - * go into an RB tree, so we can match a returned item with - * its header based on the page address. - */ - if (pgsize - (size * items) > sizeof(struct pool_item_header)) { - off = pgsize - sizeof(struct pool_item_header); - } else if (sizeof(struct pool_item_header) * 2 >= size) { - off = pgsize - sizeof(struct pool_item_header); - items = off / size; - } - - KASSERT(items > 0); +#ifdef DIAGNOSTIC + if (size > palloc->pa_pagesz) + panic("pool_init: pool item size (%lu) too large", + (u_long)size); +#endif /* * Initialize the pool structure. */ - memset(pp, 0, sizeof(*pp)); - TAILQ_INIT(&pp->pr_emptypages); - TAILQ_INIT(&pp->pr_fullpages); - TAILQ_INIT(&pp->pr_partpages); + LIST_INIT(&pp->pr_emptypages); + LIST_INIT(&pp->pr_fullpages); + LIST_INIT(&pp->pr_partpages); pp->pr_curpage = NULL; pp->pr_npages = 0; pp->pr_minitems = 0; pp->pr_minpages = 0; pp->pr_maxpages = 8; + pp->pr_roflags = flags; + pp->pr_flags = 0; pp->pr_size = size; - pp->pr_pgsize = pgsize; - pp->pr_pgmask = ~0UL ^ (pgsize - 1); - pp->pr_phoffset = off; - pp->pr_itemsperpage = items; + pp->pr_align = align; pp->pr_wchan = wchan; pp->pr_alloc = palloc; pp->pr_nitems = 0; @@ -287,7 +339,47 @@ pool_init(struct pool *pp, size_t size, pp->pr_hardlimit_ratecap.tv_usec = 0; pp->pr_hardlimit_warning_last.tv_sec = 0; pp->pr_hardlimit_warning_last.tv_usec = 0; - RB_INIT(&pp->pr_phtree); + pp->pr_serial = ++pool_serial; + if (pool_serial == 0) + panic("pool_init: too much uptime"); + + /* + * Decide whether to put the page header off page to avoid + * wasting too large a part of the page. Off-page page headers + * go into an RB tree, so we can match a returned item with + * its header based on the page address. + * We use 1/16 of the page size as the threshold (XXX: tune) + */ + if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) { + /* Use the end of the page for the page header */ + pp->pr_roflags |= PR_PHINPAGE; + pp->pr_phoffset = off = palloc->pa_pagesz - + ALIGN(sizeof(struct pool_item_header)); + } else { + /* The page header will be taken from our page header pool */ + pp->pr_phoffset = 0; + off = palloc->pa_pagesz; + RB_INIT(&pp->pr_phtree); + } + + /* + * Alignment is to take place at `ioff' within the item. This means + * we must reserve up to `align - 1' bytes on the page to allow + * appropriate positioning of each item. + * + * Silently enforce `0 <= ioff < align'. + */ + pp->pr_itemoffset = ioff = ioff % align; + pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; + KASSERT(pp->pr_itemsperpage != 0); + + /* + * Use the slack between the chunks and the page header + * for "cache coloring". + */ + slack = off - pp->pr_itemsperpage * pp->pr_size; + pp->pr_maxcolor = (slack / align) * align; + pp->pr_curcolor = 0; pp->pr_nget = 0; pp->pr_nfail = 0; @@ -299,37 +391,18 @@ pool_init(struct pool *pp, size_t size, pp->pr_ipl = -1; mtx_init(&pp->pr_mtx, IPL_NONE); - mtx_init(&pp->pr_requests_mtx, IPL_NONE); - TAILQ_INIT(&pp->pr_requests); if (phpool.pr_size == 0) { pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 0, "phpool", NULL); pool_setipl(&phpool, IPL_HIGH); - - /* make sure phpool wont "recurse" */ - KASSERT(POOL_INPGHDR(&phpool)); } /* pglistalloc/constraint parameters */ pp->pr_crange = &kp_dirty; /* Insert this into the list of all pools. */ - rw_enter_write(&pool_lock); -#ifdef DIAGNOSTIC - SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { - if (iter == pp) - panic("%s: pool %s already on list", __func__, wchan); - } -#endif - - pp->pr_serial = ++pool_serial; - if (pool_serial == 0) - panic("%s: too much uptime", __func__); - SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); - pool_count++; - rw_exit_write(&pool_lock); } void @@ -337,7 +410,6 @@ pool_setipl(struct pool *pp, int ipl) { pp->pr_ipl = ipl; mtx_init(&pp->pr_mtx, ipl); - mtx_init(&pp->pr_requests_mtx, ipl); } /* @@ -349,14 +421,7 @@ pool_destroy(struct pool *pp) struct pool_item_header *ph; struct pool *prev, *iter; -#ifdef DIAGNOSTIC - if (pp->pr_nout != 0) - panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); -#endif - /* Remove from global pool list */ - rw_enter_write(&pool_lock); - pool_count--; if (pp == SIMPLEQ_FIRST(&pool_head)) SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); else { @@ -365,334 +430,399 @@ pool_destroy(struct pool *pp) if (iter == pp) { SIMPLEQ_REMOVE_AFTER(&pool_head, prev, pr_poollist); - break; + goto removed; } prev = iter; } +#ifdef DIAGNOSTIC + panic("destroyed pool not on list"); +#endif } - rw_exit_write(&pool_lock); +removed: +#ifdef DIAGNOSTIC + if (pp->pr_nout != 0) + panic("pool_destroy: pool busy: still out: %u", pp->pr_nout); +#endif /* Remove all pages */ - while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { - mtx_enter(&pp->pr_mtx); - pool_p_remove(pp, ph); - mtx_leave(&pp->pr_mtx); - pool_p_free(pp, ph); - } - KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); - KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); -} + while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) + pr_rmpage(pp, ph, NULL); + KASSERT(LIST_EMPTY(&pp->pr_fullpages)); + KASSERT(LIST_EMPTY(&pp->pr_partpages)); -void -pool_request_init(struct pool_request *pr, - void (*handler)(void *, void *), void *cookie) -{ - pr->pr_handler = handler; - pr->pr_cookie = cookie; - pr->pr_item = NULL; } -void -pool_request(struct pool *pp, struct pool_request *pr) +struct pool_item_header * +pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) { - mtx_enter(&pp->pr_requests_mtx); - TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); - pool_runqueue(pp, PR_NOWAIT); - mtx_leave(&pp->pr_requests_mtx); -} + struct pool_item_header *ph; -struct pool_get_memory { - struct mutex mtx; - void * volatile v; -}; + if ((pp->pr_roflags & PR_PHINPAGE) != 0) + ph = (struct pool_item_header *)(storage + pp->pr_phoffset); + else + ph = pool_get(&phpool, (flags & ~(PR_WAITOK | PR_ZERO)) | + PR_NOWAIT); +#ifdef DIAGNOSTIC + if (pool_debug && ph != NULL) + ph->ph_magic = poison_value(ph); +#endif + return (ph); +} /* - * Grab an item from the pool. + * Grab an item from the pool; must be called at appropriate spl level */ void * pool_get(struct pool *pp, int flags) { - void *v = NULL; - int slowdown = 0; + void *v; KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); + if ((flags & PR_WAITOK) != 0) { +#ifdef DIAGNOSTIC + assertwaitok(); + if (pool_debug == 2) + yield(); +#endif + if (!cold && pool_debug) { + KERNEL_UNLOCK(); + KERNEL_LOCK(); + } + } mtx_enter(&pp->pr_mtx); - if (pp->pr_nout >= pp->pr_hardlimit) { - if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) - goto fail; - } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { - if (ISSET(flags, PR_NOWAIT)) - goto fail; +#ifdef POOL_DEBUG + if (pp->pr_roflags & PR_DEBUGCHK) { + if (pool_chk(pp)) + panic("before pool_get"); } - mtx_leave(&pp->pr_mtx); - - if (slowdown && ISSET(flags, PR_WAITOK)) - yield(); - - if (v == NULL) { - struct pool_get_memory mem = { - MUTEX_INITIALIZER((pp->pr_ipl == -1) ? - IPL_NONE : pp->pr_ipl), NULL }; - struct pool_request pr; - - pool_request_init(&pr, pool_get_done, &mem); - pool_request(pp, &pr); - - mtx_enter(&mem.mtx); - while (mem.v == NULL) - msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0); - mtx_leave(&mem.mtx); - - v = mem.v; +#endif + v = pool_do_get(pp, flags); +#ifdef POOL_DEBUG + if (pp->pr_roflags & PR_DEBUGCHK) { + if (pool_chk(pp)) + panic("after pool_get"); } +#endif + if (v != NULL) + pp->pr_nget++; + mtx_leave(&pp->pr_mtx); + if (v == NULL) + return (v); - if (ISSET(flags, PR_ZERO)) + if (flags & PR_ZERO) memset(v, 0, pp->pr_size); return (v); - -fail: - pp->pr_nfail++; - mtx_leave(&pp->pr_mtx); - return (NULL); } -void -pool_get_done(void *xmem, void *v) -{ - struct pool_get_memory *mem = xmem; - - mtx_enter(&mem->mtx); - mem->v = v; - mtx_leave(&mem->mtx); - - wakeup_one(mem); -} - -void -pool_runqueue(struct pool *pp, int flags) +void * +pool_do_get(struct pool *pp, int flags) { - struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); - struct pool_request *pr; - - MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); - MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx); - - if (pp->pr_requesting++) - return; - - do { - pp->pr_requesting = 1; - - /* no TAILQ_JOIN? :( */ - while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) { - TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry); - TAILQ_INSERT_TAIL(&prl, pr, pr_entry); - } - if (TAILQ_EMPTY(&prl)) - continue; - - mtx_leave(&pp->pr_requests_mtx); - - mtx_enter(&pp->pr_mtx); - pr = TAILQ_FIRST(&prl); - while (pr != NULL) { - int slowdown = 0; - - if (pp->pr_nout >= pp->pr_hardlimit) - break; - - pr->pr_item = pool_do_get(pp, flags, &slowdown); - if (pr->pr_item == NULL) /* || slowdown ? */ - break; + struct pool_item *pi; + struct pool_item_header *ph; + void *v; + int slowdown = 0; - pr = TAILQ_NEXT(pr, pr_entry); - } - mtx_leave(&pp->pr_mtx); +#ifdef MALLOC_DEBUG + if (pp->pr_roflags & PR_DEBUG) { + void *addr; + + addr = NULL; + debug_malloc(pp->pr_size, M_DEBUG, + (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr); + return (addr); + } +#endif - while ((pr = TAILQ_FIRST(&prl)) != NULL && - pr->pr_item != NULL) { - TAILQ_REMOVE(&prl, pr, pr_entry); - (*pr->pr_handler)(pr->pr_cookie, pr->pr_item); +startover: + /* + * Check to see if we've reached the hard limit. If we have, + * and we can wait, then wait until an item has been returned to + * the pool. + */ +#ifdef DIAGNOSTIC + if (pp->pr_nout > pp->pr_hardlimit) + panic("pool_do_get: %s: crossed hard limit", pp->pr_wchan); +#endif + if (pp->pr_nout == pp->pr_hardlimit) { + if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { + /* + * XXX: A warning isn't logged in this case. Should + * it be? + */ + pp->pr_flags |= PR_WANTED; + pool_sleep(pp); + goto startover; } - mtx_enter(&pp->pr_requests_mtx); - } while (--pp->pr_requesting); + /* + * Log a message that the hard limit has been hit. + */ + if (pp->pr_hardlimit_warning != NULL && + ratecheck(&pp->pr_hardlimit_warning_last, + &pp->pr_hardlimit_ratecap)) + log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); - /* no TAILQ_JOIN :( */ - while ((pr = TAILQ_FIRST(&prl)) != NULL) { - TAILQ_REMOVE(&prl, pr, pr_entry); - TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); + pp->pr_nfail++; + return (NULL); } -} - -void * -pool_do_get(struct pool *pp, int flags, int *slowdown) -{ - struct pool_item *pi; - struct pool_item_header *ph; - - MUTEX_ASSERT_LOCKED(&pp->pr_mtx); - - if (pp->pr_ipl != -1) - splassert(pp->pr_ipl); + pool_swizzle_curpage(pp); /* - * Account for this item now to avoid races if we need to give up - * pr_mtx to allocate a page. + * The convention we use is that if `curpage' is not NULL, then + * it points at a non-empty bucket. In particular, `curpage' + * never points at a page header which has PR_PHINPAGE set and + * has no items in its bucket. */ - pp->pr_nout++; + if ((ph = pp->pr_curpage) == NULL) { +#ifdef DIAGNOSTIC + if (pp->pr_nitems != 0) { + printf("pool_do_get: %s: curpage NULL, nitems %u\n", + pp->pr_wchan, pp->pr_nitems); + panic("pool_do_get: nitems inconsistent"); + } +#endif - if (pp->pr_curpage == NULL) { - mtx_leave(&pp->pr_mtx); - ph = pool_p_alloc(pp, flags, slowdown); - mtx_enter(&pp->pr_mtx); + /* + * Call the back-end page allocator for more memory. + */ + v = pool_allocator_alloc(pp, flags, &slowdown); + if (v != NULL) + ph = pool_alloc_item_header(pp, v, flags); + + if (v == NULL || ph == NULL) { + if (v != NULL) + pool_allocator_free(pp, v); + + if ((flags & PR_WAITOK) == 0) { + pp->pr_nfail++; + return (NULL); + } - if (ph == NULL) { - pp->pr_nout--; - return (NULL); + /* + * Wait for items to be returned to this pool. + * + * XXX: maybe we should wake up once a second and + * try again? + */ + pp->pr_flags |= PR_WANTED; + pool_sleep(pp); + goto startover; } - pool_p_insert(pp, ph); - } + /* We have more memory; add it to the pool */ + pool_prime_page(pp, v, ph); + pp->pr_npagealloc++; - ph = pp->pr_curpage; - pi = XSIMPLEQ_FIRST(&ph->ph_itemlist); - if (__predict_false(pi == NULL)) - panic("%s: %s: page empty", __func__, pp->pr_wchan); - - if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { - panic("%s: %s free list modified: " - "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", - __func__, pp->pr_wchan, ph->ph_page, pi, - 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); - } + if (slowdown && (flags & PR_WAITOK)) { + mtx_leave(&pp->pr_mtx); + yield(); + mtx_enter(&pp->pr_mtx); + } - XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); + /* Start the allocation process over. */ + goto startover; + } + if ((v = pi = XSIMPLEQ_FIRST(&ph->ph_itemlist)) == NULL) { + panic("pool_do_get: %s: page empty", pp->pr_wchan); + } +#ifdef DIAGNOSTIC + if (pp->pr_nitems == 0) { + printf("pool_do_get: %s: items on itemlist, nitems %u\n", + pp->pr_wchan, pp->pr_nitems); + panic("pool_do_get: nitems inconsistent"); + } +#endif #ifdef DIAGNOSTIC - if (pool_debug && POOL_PHPOISON(ph)) { + if (pi->pi_magic != poison_value(pi)) + panic("pool_do_get(%s): free list modified: " + "page %p; item addr %p; offset 0x%x=0x%x", + pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic); + if (pool_debug && ph->ph_magic) { size_t pidx; uint32_t pval; if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), &pidx, &pval)) { int *ip = (int *)(pi + 1); - panic("%s: %s free list modified: " + panic("pool_do_get(%s): free list modified: " "page %p; item addr %p; offset 0x%zx=0x%x", - __func__, pp->pr_wchan, ph->ph_page, pi, + pp->pr_wchan, ph->ph_page, pi, pidx * sizeof(int), ip[pidx]); } } #endif /* DIAGNOSTIC */ - if (ph->ph_nmissing++ == 0) { + /* + * Remove from item list. + */ + XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list); + pp->pr_nitems--; + pp->pr_nout++; + if (ph->ph_nmissing == 0) { +#ifdef DIAGNOSTIC + if (pp->pr_nidle == 0) + panic("pool_do_get: nidle inconsistent"); +#endif + pp->pr_nidle--; + /* * This page was previously empty. Move it to the list of * partially-full pages. This page is already curpage. */ - TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); - TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); - - pp->pr_nidle--; + LIST_REMOVE(ph, ph_pagelist); + LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); } - - if (ph->ph_nmissing == pp->pr_itemsperpage) { + ph->ph_nmissing++; + if (XSIMPLEQ_EMPTY(&ph->ph_itemlist)) { +#ifdef DIAGNOSTIC + if (ph->ph_nmissing != pp->pr_itemsperpage) { + panic("pool_do_get: %s: nmissing inconsistent", + pp->pr_wchan); + } +#endif /* * This page is now full. Move it to the full list * and select a new current page. */ - TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); - TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist); + LIST_REMOVE(ph, ph_pagelist); + LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); pool_update_curpage(pp); } - pp->pr_nget++; - - return (pi); + /* + * If we have a low water mark and we are now below that low + * water mark, add more items to the pool. + */ + if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { + /* + * XXX: Should we log a warning? Should we set up a timeout + * to try again in a second or so? The latter could break + * a caller's assumptions about interrupt protection, etc. + */ + } + return (v); } /* - * Return resource to the pool. + * Return resource to the pool; must be called at appropriate spl level */ void pool_put(struct pool *pp, void *v) { + mtx_enter(&pp->pr_mtx); +#ifdef POOL_DEBUG + if (pp->pr_roflags & PR_DEBUGCHK) { + if (pool_chk(pp)) + panic("before pool_put"); + } +#endif + pool_do_put(pp, v); +#ifdef POOL_DEBUG + if (pp->pr_roflags & PR_DEBUGCHK) { + if (pool_chk(pp)) + panic("after pool_put"); + } +#endif + pp->pr_nput++; + mtx_leave(&pp->pr_mtx); +} + +/* + * Internal version of pool_put(). + */ +void +pool_do_put(struct pool *pp, void *v) +{ struct pool_item *pi = v; - struct pool_item_header *ph, *freeph = NULL; - extern int ticks; + struct pool_item_header *ph; -#ifdef DIAGNOSTIC if (v == NULL) - panic("%s: NULL item", __func__); -#endif + panic("pool_put of NULL"); - mtx_enter(&pp->pr_mtx); +#ifdef MALLOC_DEBUG + if (pp->pr_roflags & PR_DEBUG) { + debug_free(v, M_DEBUG); + return; + } +#endif +#ifdef DIAGNOSTIC if (pp->pr_ipl != -1) splassert(pp->pr_ipl); - ph = pr_find_pagehead(pp, v); + if (pp->pr_nout == 0) { + printf("pool %s: putting with none out\n", + pp->pr_wchan); + panic("pool_do_put"); + } +#endif + if ((ph = pr_find_pagehead(pp, v)) == NULL) { + panic("pool_do_put: %s: page header missing", pp->pr_wchan); + } + + /* + * Return to item list. + */ #ifdef DIAGNOSTIC if (pool_debug) { struct pool_item *qi; - XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) { - if (pi == qi) { - panic("%s: %s: double pool_put: %p", __func__, - pp->pr_wchan, pi); - } - } + XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) + if (pi == qi) + panic("double pool_put: %p", pi); + } + pi->pi_magic = poison_value(pi); + if (ph->ph_magic) { + poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); } #endif /* DIAGNOSTIC */ - pi->pi_magic = POOL_IMAGIC(ph, pi); XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); -#ifdef DIAGNOSTIC - if (POOL_PHPOISON(ph)) - poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); -#endif /* DIAGNOSTIC */ + ph->ph_nmissing--; + pp->pr_nitems++; + pp->pr_nout--; - if (ph->ph_nmissing-- == pp->pr_itemsperpage) { - /* - * The page was previously completely full, move it to the - * partially-full list. - */ - TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist); - TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist); + /* Cancel "pool empty" condition if it exists */ + if (pp->pr_curpage == NULL) + pp->pr_curpage = ph; + + if (pp->pr_flags & PR_WANTED) { + pp->pr_flags &= ~PR_WANTED; + wakeup(pp); } + /* + * If this page is now empty, do one of two things: + * + * (1) If we have more pages than the page high water mark, + * free the page back to the system. + * + * (2) Otherwise, move the page to the empty page list. + * + * Either way, select a new current page (so we use a partially-full + * page if one is available). + */ if (ph->ph_nmissing == 0) { - /* - * The page is now empty, so move it to the empty page list. - */ pp->pr_nidle++; - - ph->ph_tick = ticks; - TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist); - TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); - pool_update_curpage(pp); + if (pp->pr_nidle > pp->pr_maxpages) { + pr_rmpage(pp, ph, NULL); + } else { + LIST_REMOVE(ph, ph_pagelist); + LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); + pool_update_curpage(pp); + } } - - pp->pr_nout--; - pp->pr_nput++; - - /* is it time to free a page? */ - if (pp->pr_nidle > pp->pr_maxpages && - (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && - (ticks - ph->ph_tick) > hz) { - freeph = ph; - pool_p_remove(pp, freeph); + /* + * If the page was previously completely full, move it to the + * partially-full list. + */ + else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { + LIST_REMOVE(ph, ph_pagelist); + LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); } - mtx_leave(&pp->pr_mtx); - - if (freeph != NULL) - pool_p_free(pp, freeph); - - mtx_enter(&pp->pr_requests_mtx); - pool_runqueue(pp, PR_NOWAIT); - mtx_leave(&pp->pr_requests_mtx); } /* @@ -701,193 +831,200 @@ pool_put(struct pool *pp, void *v) int pool_prime(struct pool *pp, int n) { - struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); struct pool_item_header *ph; + caddr_t cp; int newpages; + int slowdown; + mtx_enter(&pp->pr_mtx); newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; while (newpages-- > 0) { - int slowdown = 0; - - ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); - if (ph == NULL) /* or slowdown? */ + cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); + if (cp != NULL) + ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); + if (cp == NULL || ph == NULL) { + if (cp != NULL) + pool_allocator_free(pp, cp); break; + } - TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); + pool_prime_page(pp, cp, ph); + pp->pr_npagealloc++; + pp->pr_minpages++; } - mtx_enter(&pp->pr_mtx); - while ((ph = TAILQ_FIRST(&pl)) != NULL) { - TAILQ_REMOVE(&pl, ph, ph_pagelist); - pool_p_insert(pp, ph); - } - mtx_leave(&pp->pr_mtx); + if (pp->pr_minpages >= pp->pr_maxpages) + pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ + mtx_leave(&pp->pr_mtx); return (0); } -struct pool_item_header * -pool_p_alloc(struct pool *pp, int flags, int *slowdown) +/* + * Add a page worth of items to the pool. + * + * Note, we must be called with the pool descriptor LOCKED. + */ +void +pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) { - struct pool_item_header *ph; struct pool_item *pi; - caddr_t addr; - int n; - - MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); - KASSERT(pp->pr_size >= sizeof(*pi)); - - addr = pool_allocator_alloc(pp, flags, slowdown); - if (addr == NULL) - return (NULL); - - if (POOL_INPGHDR(pp)) - ph = (struct pool_item_header *)(addr + pp->pr_phoffset); - else { - ph = pool_get(&phpool, flags); - if (ph == NULL) { - pool_allocator_free(pp, addr); - return (NULL); - } - } + caddr_t cp = storage; + unsigned int align = pp->pr_align; + unsigned int ioff = pp->pr_itemoffset; + int n; + /* + * Insert page header. + */ + LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); XSIMPLEQ_INIT(&ph->ph_itemlist); - ph->ph_page = addr; + ph->ph_page = storage; + ph->ph_pagesize = pp->pr_alloc->pa_pagesz; ph->ph_nmissing = 0; - arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); -#ifdef DIAGNOSTIC - /* use a bit in ph_magic to record if we poison page items */ - if (pool_debug) - SET(ph->ph_magic, POOL_MAGICBIT); - else - CLR(ph->ph_magic, POOL_MAGICBIT); -#endif /* DIAGNOSTIC */ + if ((pp->pr_roflags & PR_PHINPAGE) == 0) + RB_INSERT(phtree, &pp->pr_phtree, ph); - n = pp->pr_itemsperpage; - while (n--) { - pi = (struct pool_item *)addr; - pi->pi_magic = POOL_IMAGIC(ph, pi); - XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); + pp->pr_nidle++; -#ifdef DIAGNOSTIC - if (POOL_PHPOISON(ph)) - poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); -#endif /* DIAGNOSTIC */ + /* + * Color this page. + */ + cp = (caddr_t)(cp + pp->pr_curcolor); + if ((pp->pr_curcolor += align) > pp->pr_maxcolor) + pp->pr_curcolor = 0; - addr += pp->pr_size; - } + /* + * Adjust storage to apply alignment to `pr_itemoffset' in each item. + */ + if (ioff != 0) + cp = (caddr_t)(cp + (align - ioff)); + ph->ph_colored = cp; - return (ph); -} + /* + * Insert remaining chunks on the bucket list. + */ + n = pp->pr_itemsperpage; + pp->pr_nitems += n; -void -pool_p_free(struct pool *pp, struct pool_item_header *ph) -{ - struct pool_item *pi; + while (n--) { + pi = (struct pool_item *)cp; - MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx); - KASSERT(ph->ph_nmissing == 0); + KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); - XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { - if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { - panic("%s: %s free list modified: " - "page %p; item addr %p; offset 0x%x=0x%lx", - __func__, pp->pr_wchan, ph->ph_page, pi, - 0, pi->pi_magic); - } + /* Insert on page list */ + XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); #ifdef DIAGNOSTIC - if (POOL_PHPOISON(ph)) { - size_t pidx; - uint32_t pval; - if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), - &pidx, &pval)) { - int *ip = (int *)(pi + 1); - panic("%s: %s free list modified: " - "page %p; item addr %p; offset 0x%zx=0x%x", - __func__, pp->pr_wchan, ph->ph_page, pi, - pidx * sizeof(int), ip[pidx]); - } + pi->pi_magic = poison_value(pi); + if (ph->ph_magic) { + poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); } -#endif - } - - pool_allocator_free(pp, ph->ph_page); - - if (!POOL_INPGHDR(pp)) - pool_put(&phpool, ph); -} - -void -pool_p_insert(struct pool *pp, struct pool_item_header *ph) -{ - MUTEX_ASSERT_LOCKED(&pp->pr_mtx); +#endif /* DIAGNOSTIC */ + cp = (caddr_t)(cp + pp->pr_size); + } - /* If the pool was depleted, point at the new page */ + /* + * If the pool was depleted, point at the new page. + */ if (pp->pr_curpage == NULL) pp->pr_curpage = ph; - TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist); - if (!POOL_INPGHDR(pp)) - RB_INSERT(phtree, &pp->pr_phtree, ph); - - pp->pr_nitems += pp->pr_itemsperpage; - pp->pr_nidle++; - - pp->pr_npagealloc++; if (++pp->pr_npages > pp->pr_hiwat) pp->pr_hiwat = pp->pr_npages; } -void -pool_p_remove(struct pool *pp, struct pool_item_header *ph) +/* + * Used by pool_get() when nitems drops below the low water mark. This + * is used to catch up pr_nitems with the low water mark. + * + * Note we never wait for memory here, we let the caller decide what to do. + */ +int +pool_catchup(struct pool *pp) { - MUTEX_ASSERT_LOCKED(&pp->pr_mtx); - - pp->pr_npagefree++; - pp->pr_npages--; - pp->pr_nidle--; - pp->pr_nitems -= pp->pr_itemsperpage; + struct pool_item_header *ph; + caddr_t cp; + int error = 0; + int slowdown; - if (!POOL_INPGHDR(pp)) - RB_REMOVE(phtree, &pp->pr_phtree, ph); - TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist); + while (POOL_NEEDS_CATCHUP(pp)) { + /* + * Call the page back-end allocator for more memory. + */ + cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); + if (cp != NULL) + ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); + if (cp == NULL || ph == NULL) { + if (cp != NULL) + pool_allocator_free(pp, cp); + error = ENOMEM; + break; + } + pool_prime_page(pp, cp, ph); + pp->pr_npagealloc++; + } - pool_update_curpage(pp); + return (error); } void pool_update_curpage(struct pool *pp) { - pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); + + pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); if (pp->pr_curpage == NULL) { - pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); + pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); } } void +pool_swizzle_curpage(struct pool *pp) +{ + struct pool_item_header *ph, *next; + + if ((ph = pp->pr_curpage) == NULL) + return; + if (arc4random_uniform(16) != 0) + return; + next = LIST_FIRST(&pp->pr_partpages); + if (next == ph) + next = LIST_NEXT(next, ph_pagelist); + if (next == NULL) { + next = LIST_FIRST(&pp->pr_emptypages); + if (next == ph) + next = LIST_NEXT(next, ph_pagelist); + } + if (next != NULL) + pp->pr_curpage = next; +} + +void pool_setlowat(struct pool *pp, int n) { - int prime = 0; - mtx_enter(&pp->pr_mtx); pp->pr_minitems = n; pp->pr_minpages = (n == 0) ? 0 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; - if (pp->pr_nitems < n) - prime = n - pp->pr_nitems; + mtx_enter(&pp->pr_mtx); + /* Make sure we're caught up with the newly-set low water mark. */ + if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { + /* + * XXX: Should we log a warning? Should we set up a timeout + * to try again in a second or so? The latter could break + * a caller's assumptions about interrupt protection, etc. + */ + } mtx_leave(&pp->pr_mtx); - - if (prime > 0) - pool_prime(pp, prime); } void pool_sethiwat(struct pool *pp, int n) { + pp->pr_maxpages = (n == 0) ? 0 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; @@ -928,16 +1065,20 @@ int pool_reclaim(struct pool *pp) { struct pool_item_header *ph, *phnext; - struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); + struct pool_pagelist pq; + + LIST_INIT(&pq); mtx_enter(&pp->pr_mtx); - for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { - phnext = TAILQ_NEXT(ph, ph_pagelist); + for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { + phnext = LIST_NEXT(ph, ph_pagelist); /* Check our minimum page claim */ if (pp->pr_npages <= pp->pr_minpages) break; + KASSERT(ph->ph_nmissing == 0); + /* * If freeing this page would put us below * the low water mark, stop now. @@ -946,17 +1087,18 @@ pool_reclaim(struct pool *pp) pp->pr_minitems) break; - pool_p_remove(pp, ph); - TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist); + pr_rmpage(pp, ph, &pq); } mtx_leave(&pp->pr_mtx); - if (TAILQ_EMPTY(&pl)) + if (LIST_EMPTY(&pq)) return (0); - - while ((ph = TAILQ_FIRST(&pl)) != NULL) { - TAILQ_REMOVE(&pl, ph, ph_pagelist); - pool_p_free(pp, ph); + while ((ph = LIST_FIRST(&pq)) != NULL) { + LIST_REMOVE(ph, ph_pagelist); + pool_allocator_free(pp, ph->ph_page); + if (pp->pr_roflags & PR_PHINPAGE) + continue; + pool_put(&phpool, ph); } return (1); @@ -970,11 +1112,12 @@ void pool_reclaim_all(void) { struct pool *pp; + int s; - rw_enter_read(&pool_lock); + s = splhigh(); SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) pool_reclaim(pp); - rw_exit_read(&pool_lock); + splx(s); } #ifdef DDB @@ -997,17 +1140,21 @@ pool_print_pagelist(struct pool_pagelist int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) { struct pool_item_header *ph; +#ifdef DIAGNOSTIC struct pool_item *pi; +#endif - TAILQ_FOREACH(ph, pl, ph_pagelist) { + LIST_FOREACH(ph, pl, ph_pagelist) { (*pr)("\t\tpage %p, nmissing %d\n", ph->ph_page, ph->ph_nmissing); +#ifdef DIAGNOSTIC XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { - if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { - (*pr)("\t\t\titem %p, magic 0x%lx\n", + if (pi->pi_magic != poison_value(pi)) { + (*pr)("\t\t\titem %p, magic 0x%x\n", pi, pi->pi_magic); } } +#endif } } @@ -1025,7 +1172,9 @@ pool_print1(struct pool *pp, const char modif++; } - (*pr)("POOL %s: size %u\n", pp->pr_wchan, pp->pr_size); + (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", + pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, + pp->pr_roflags); (*pr)("\talloc %p\n", pp->pr_alloc); (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); @@ -1040,13 +1189,13 @@ pool_print1(struct pool *pp, const char if (print_pagelist == 0) return; - if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) + if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) (*pr)("\n\tempty page list:\n"); pool_print_pagelist(&pp->pr_emptypages, pr); - if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) + if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) (*pr)("\n\tfull page list:\n"); pool_print_pagelist(&pp->pr_fullpages, pr); - if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) + if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) (*pr)("\n\tpartial-page list:\n"); pool_print_pagelist(&pp->pr_partpages, pr); @@ -1140,8 +1289,9 @@ pool_chk_page(struct pool *pp, struct po int n; const char *label = pp->pr_wchan; - page = (caddr_t)((u_long)ph & pp->pr_pgmask); - if (page != ph->ph_page && POOL_INPGHDR(pp)) { + page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); + if (page != ph->ph_page && + (pp->pr_roflags & PR_PHINPAGE) != 0) { printf("%s: ", label); printf("pool(%p:%s): page inconsistency: page %p; " "at page head addr %p (p %p)\n", @@ -1152,17 +1302,17 @@ pool_chk_page(struct pool *pp, struct po for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0; pi != NULL; pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) { - if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { + +#ifdef DIAGNOSTIC + if (pi->pi_magic != poison_value(pi)) { printf("%s: ", label); - printf("pool(%p:%s): free list modified: " + printf("pool(%s): free list modified: " "page %p; item ordinal %d; addr %p " - "(p %p); offset 0x%x=0x%lx\n", - pp, pp->pr_wchan, ph->ph_page, n, pi, page, + "(p %p); offset 0x%x=0x%x\n", + pp->pr_wchan, ph->ph_page, n, pi, page, 0, pi->pi_magic); } - -#ifdef DIAGNOSTIC - if (POOL_PHPOISON(ph)) { + if (pool_debug && ph->ph_magic) { size_t pidx; uint32_t pval; if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), @@ -1176,8 +1326,8 @@ pool_chk_page(struct pool *pp, struct po } } #endif /* DIAGNOSTIC */ - - page = (caddr_t)((u_long)pi & pp->pr_pgmask); + page = + (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); if (page == ph->ph_page) continue; @@ -1210,11 +1360,11 @@ pool_chk(struct pool *pp) struct pool_item_header *ph; int r = 0; - TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) + LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) r += pool_chk_page(pp, ph, pp->pr_itemsperpage); - TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) + LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) r += pool_chk_page(pp, ph, 0); - TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) + LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) r += pool_chk_page(pp, ph, -1); return (r); @@ -1233,8 +1383,8 @@ pool_walk(struct pool *pp, int full, caddr_t cp; int n; - TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { - cp = ph->ph_page; + LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { + cp = ph->ph_colored; n = ph->ph_nmissing; while (n--) { @@ -1243,8 +1393,8 @@ pool_walk(struct pool *pp, int full, } } - TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { - cp = ph->ph_page; + LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { + cp = ph->ph_colored; n = ph->ph_nmissing; do { @@ -1270,51 +1420,62 @@ pool_walk(struct pool *pp, int full, * kern.pool.name. - the name for pool#. */ int -sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) +sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep) { struct kinfo_pool pi; struct pool *pp; - int rv = ENOENT; + size_t buflen = where != NULL ? *sizep : 0; + int npools = 0, s; + unsigned int lookfor; + size_t len; - switch (name[0]) { + switch (*name) { case KERN_POOL_NPOOLS: - if (namelen != 1) - return (ENOTDIR); - return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); - + if (namelen != 1 || buflen != sizeof(int)) + return (EINVAL); + lookfor = 0; + break; case KERN_POOL_NAME: + if (namelen != 2 || buflen < 1) + return (EINVAL); + lookfor = name[1]; + break; case KERN_POOL_POOL: + if (namelen != 2 || buflen != sizeof(pi)) + return (EINVAL); + lookfor = name[1]; break; default: - return (EOPNOTSUPP); + return (EINVAL); } - if (namelen != 2) - return (ENOTDIR); - - rw_enter_read(&pool_lock); + s = splvm(); SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { - if (name[1] == pp->pr_serial) + npools++; + if (lookfor == pp->pr_serial) break; } - if (pp == NULL) - goto done; + splx(s); - switch (name[0]) { + if (*name != KERN_POOL_NPOOLS && pp == NULL) + return (ENOENT); + + switch (*name) { + case KERN_POOL_NPOOLS: + return copyout(&npools, where, buflen); case KERN_POOL_NAME: - rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); - break; + len = strlen(pp->pr_wchan) + 1; + if (*sizep < len) + return (ENOMEM); + *sizep = len; + return copyout(pp->pr_wchan, where, len); case KERN_POOL_POOL: memset(&pi, 0, sizeof(pi)); - - if (pp->pr_ipl != -1) - mtx_enter(&pp->pr_mtx); pi.pr_size = pp->pr_size; - pi.pr_pgsize = pp->pr_pgsize; + pi.pr_pgsize = pp->pr_alloc->pa_pagesz; pi.pr_itemsperpage = pp->pr_itemsperpage; - pi.pr_npages = pp->pr_npages; pi.pr_minpages = pp->pr_minpages; pi.pr_maxpages = pp->pr_maxpages; pi.pr_hardlimit = pp->pr_hardlimit; @@ -1327,41 +1488,54 @@ sysctl_dopool(int *name, u_int namelen, pi.pr_npagefree = pp->pr_npagefree; pi.pr_hiwat = pp->pr_hiwat; pi.pr_nidle = pp->pr_nidle; - if (pp->pr_ipl != -1) - mtx_leave(&pp->pr_mtx); - - rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); - break; + return copyout(&pi, where, buflen); } - -done: - rw_exit_read(&pool_lock); - - return (rv); + /* NOTREACHED */ + return (0); /* XXX - Stupid gcc */ } /* * Pool backend allocators. + * + * Each pool has a backend allocator that handles allocation, deallocation + */ +void *pool_page_alloc(struct pool *, int, int *); +void pool_page_free(struct pool *, void *); + +/* + * safe for interrupts, name preserved for compat this is the default + * allocator + */ +struct pool_allocator pool_allocator_nointr = { + pool_page_alloc, pool_page_free, 0, +}; + +/* + * XXX - we have at least three different resources for the same allocation + * and each resource can be depleted. First we have the ready elements in + * the pool. Then we have the resource (typically a vm_map) for this + * allocator, then we have physical memory. Waiting for any of these can + * be unnecessary when any other is freed, but the kernel doesn't support + * sleeping on multiple addresses, so we have to fake. The caller sleeps on + * the pool (so that we can be awakened when an item is returned to the pool), + * but we set PA_WANT on the allocator. When a page is returned to + * the allocator and PA_WANT is set pool_allocator_free will wakeup all + * sleeping pools belonging to this allocator. (XXX - thundering herd). + * We also wake up the allocator in case someone without a pool (malloc) + * is sleeping waiting for this allocator. */ void * pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) { + int waitok = flags & PR_WAITOK; void *v; - KERNEL_LOCK(); - v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); - KERNEL_UNLOCK(); - -#ifdef DIAGNOSTIC - if (v != NULL && POOL_INPGHDR(pp)) { - vaddr_t addr = (vaddr_t)v; - if ((addr & pp->pr_pgmask) != addr) { - panic("%s: %s page address %p isnt aligned to %u", - __func__, pp->pr_wchan, v, pp->pr_pgsize); - } - } -#endif + if (waitok) + mtx_leave(&pp->pr_mtx); + v = pp->pr_alloc->pa_alloc(pp, flags, slowdown); + if (waitok) + mtx_enter(&pp->pr_mtx); return (v); } @@ -1371,9 +1545,7 @@ pool_allocator_free(struct pool *pp, voi { struct pool_allocator *pa = pp->pr_alloc; - KERNEL_LOCK(); (*pa->pa_free)(pp, v); - KERNEL_UNLOCK(); } void * @@ -1381,34 +1553,31 @@ pool_page_alloc(struct pool *pp, int fla { struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; - kd.kd_waitok = ISSET(flags, PR_WAITOK); + kd.kd_waitok = (flags & PR_WAITOK); kd.kd_slowdown = slowdown; - return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); + return (km_alloc(PAGE_SIZE, &kv_page, pp->pr_crange, &kd)); } void pool_page_free(struct pool *pp, void *v) { - km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); + km_free(v, PAGE_SIZE, &kv_page, pp->pr_crange); } void * pool_large_alloc(struct pool *pp, int flags, int *slowdown) { - struct kmem_va_mode kv = kv_intrsafe; struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; void *v; int s; - if (POOL_INPGHDR(pp)) - kv.kv_align = pp->pr_pgsize; - - kd.kd_waitok = ISSET(flags, PR_WAITOK); + kd.kd_waitok = (flags & PR_WAITOK); kd.kd_slowdown = slowdown; s = splvm(); - v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); + v = km_alloc(pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange, + &kd); splx(s); return (v); @@ -1417,39 +1586,26 @@ pool_large_alloc(struct pool *pp, int fl void pool_large_free(struct pool *pp, void *v) { - struct kmem_va_mode kv = kv_intrsafe; int s; - if (POOL_INPGHDR(pp)) - kv.kv_align = pp->pr_pgsize; - s = splvm(); - km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); + km_free(v, pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange); splx(s); } void * pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown) { - struct kmem_va_mode kv = kv_any; struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; - if (POOL_INPGHDR(pp)) - kv.kv_align = pp->pr_pgsize; - - kd.kd_waitok = ISSET(flags, PR_WAITOK); + kd.kd_waitok = (flags & PR_WAITOK); kd.kd_slowdown = slowdown; - return (km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd)); + return (km_alloc(pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange, &kd)); } void pool_large_free_ni(struct pool *pp, void *v) { - struct kmem_va_mode kv = kv_any; - - if (POOL_INPGHDR(pp)) - kv.kv_align = pp->pr_pgsize; - - km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); + km_free(v, pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange); } Index: kern/uipc_mbuf.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v retrieving revision 1.199 diff -u -p -r1.199 uipc_mbuf.c --- kern/uipc_mbuf.c 11 Dec 2014 19:21:57 -0000 1.199 +++ kern/uipc_mbuf.c 7 Jan 2015 03:37:52 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_mbuf.c,v 1.199 2014/12/11 19:21:57 tedu Exp $ */ +/* $OpenBSD: uipc_mbuf.c,v 1.192 2014/07/13 15:52:38 tedu Exp $ */ /* $NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $ */ /* @@ -74,6 +74,7 @@ #include #include +#include #include #include #include @@ -95,7 +96,6 @@ #endif struct mbstat mbstat; /* mbuf stats */ -struct mutex mbstatmtx = MUTEX_INITIALIZER(IPL_NET); struct pool mbpool; /* mbuf pool */ struct pool mtagpool; @@ -118,8 +118,6 @@ int max_linkhdr; /* largest link-level int max_protohdr; /* largest protocol header */ int max_hdr; /* largest link+protocol header */ -struct mutex m_extref_mtx = MUTEX_INITIALIZER(IPL_NET); - void m_extfree(struct mbuf *); struct mbuf *m_copym0(struct mbuf *, int, int, int, int); void nmbclust_update(void); @@ -138,14 +136,11 @@ mbinit(void) int i; #if DIAGNOSTIC - if (mclsizes[0] != MCLBYTES) - panic("mbinit: the smallest cluster size != MCLBYTES"); if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES) panic("mbinit: the largest cluster size != MAXMCLBYTES"); #endif - pool_init(&mbpool, MSIZE, 0, 0, 0, "mbufpl", NULL); - pool_setipl(&mbpool, IPL_NET); + pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", NULL); pool_set_constraints(&mbpool, &kp_dma_contig); pool_setlowat(&mbpool, mblowat); @@ -158,7 +153,6 @@ mbinit(void) mclsizes[i] >> 10); pool_init(&mclpools[i], mclsizes[i], 0, 0, 0, mclnames[i], NULL); - pool_setipl(&mclpools[i], IPL_NET); pool_set_constraints(&mclpools[i], &kp_dma_contig); pool_setlowat(&mclpools[i], mcllowat); } @@ -190,6 +184,21 @@ nmbclust_update(void) pool_sethiwat(&mbpool, nmbclust); } +void +m_reclaim(void *arg, int flags) +{ + struct domain *dp; + struct protosw *pr; + int s = splnet(); + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_drain) + (*pr->pr_drain)(); + mbstat.m_drain++; + splx(s); +} + /* * Space allocation routines. */ @@ -197,21 +206,20 @@ struct mbuf * m_get(int nowait, int type) { struct mbuf *m; + int s; + s = splnet(); m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT); - if (m == NULL) - return (NULL); - - mtx_enter(&mbstatmtx); - mbstat.m_mtypes[type]++; - mtx_leave(&mbstatmtx); - - m->m_type = type; - m->m_next = NULL; - m->m_nextpkt = NULL; - m->m_data = m->m_dat; - m->m_flags = 0; - + if (m) + mbstat.m_mtypes[type]++; + splx(s); + if (m) { + m->m_type = type; + m->m_next = NULL; + m->m_nextpkt = NULL; + m->m_data = m->m_dat; + m->m_flags = 0; + } return (m); } @@ -223,18 +231,25 @@ struct mbuf * m_gethdr(int nowait, int type) { struct mbuf *m; + int s; + s = splnet(); m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT); - if (m == NULL) - return (NULL); - - mtx_enter(&mbstatmtx); - mbstat.m_mtypes[type]++; - mtx_leave(&mbstatmtx); + if (m) + mbstat.m_mtypes[type]++; + splx(s); + if (m) { + m->m_type = type; - m->m_type = type; - - return (m_inithdr(m)); + /* keep in sync with m_inithdr */ + m->m_next = NULL; + m->m_nextpkt = NULL; + m->m_data = m->m_pktdat; + m->m_flags = M_PKTHDR; + memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr)); + m->m_pkthdr.pf.prio = IFQ_DEFPRIO; + } + return (m); } struct mbuf * @@ -284,6 +299,7 @@ m_clget(struct mbuf *m, int how, struct struct mbuf *m0 = NULL; struct pool *pp; caddr_t buf; + int s; pp = m_clpool(pktlen); #ifdef DIAGNOSTIC @@ -291,19 +307,23 @@ m_clget(struct mbuf *m, int how, struct panic("m_clget: request for %u byte cluster", pktlen); #endif + s = splnet(); if (m == NULL) { - m0 = m_gethdr(how, MT_DATA); - if (m0 == NULL) + MGETHDR(m0, M_DONTWAIT, MT_DATA); + if (m0 == NULL) { + splx(s); return (NULL); - + } m = m0; } buf = pool_get(pp, how == M_WAIT ? PR_WAITOK : PR_NOWAIT); if (buf == NULL) { if (m0) m_freem(m0); + splx(s); return (NULL); } + splx(s); MEXTADD(m, buf, pp->pr_size, M_EXTWR, m_extfree_pool, pp); return (m); @@ -312,18 +332,16 @@ m_clget(struct mbuf *m, int how, struct void m_extfree_pool(caddr_t buf, u_int size, void *pp) { + splassert(IPL_NET); pool_put(pp, buf); } struct mbuf * -m_free(struct mbuf *m) +m_free_unlocked(struct mbuf *m) { struct mbuf *n; - mtx_enter(&mbstatmtx); mbstat.m_mtypes[m->m_type]--; - mtx_leave(&mbstatmtx); - n = m->m_next; if (m->m_flags & M_ZEROIZE) { m_zero(m); @@ -335,68 +353,54 @@ m_free(struct mbuf *m) m_tag_delete_chain(m); if (m->m_flags & M_EXT) m_extfree(m); - pool_put(&mbpool, m); return (n); } -void -m_extref(struct mbuf *o, struct mbuf *n) +struct mbuf * +m_free(struct mbuf *m) { - int refs = MCLISREFERENCED(o); - - n->m_flags |= o->m_flags & (M_EXT|M_EXTWR); + struct mbuf *n; + int s; - if (refs) - mtx_enter(&m_extref_mtx); - n->m_ext.ext_nextref = o->m_ext.ext_nextref; - n->m_ext.ext_prevref = o; - o->m_ext.ext_nextref = n; - n->m_ext.ext_nextref->m_ext.ext_prevref = n; - if (refs) - mtx_leave(&m_extref_mtx); + s = splnet(); + n = m_free_unlocked(m); + splx(s); - MCLREFDEBUGN((n), __FILE__, __LINE__); + return (n); } -static inline u_int -m_extunref(struct mbuf *m) +void +m_extfree(struct mbuf *m) { - int refs = 1; - - if (!MCLISREFERENCED(m)) - return (0); - - mtx_enter(&m_extref_mtx); if (MCLISREFERENCED(m)) { m->m_ext.ext_nextref->m_ext.ext_prevref = m->m_ext.ext_prevref; m->m_ext.ext_prevref->m_ext.ext_nextref = m->m_ext.ext_nextref; - } else - refs = 0; - mtx_leave(&m_extref_mtx); - - return (refs); -} - -void -m_extfree(struct mbuf *m) -{ - if (m_extunref(m) == 0) { + } else if (m->m_ext.ext_free) (*(m->m_ext.ext_free))(m->m_ext.ext_buf, m->m_ext.ext_size, m->m_ext.ext_arg); - } - + else + panic("unknown type of extension buffer"); + m->m_ext.ext_size = 0; m->m_flags &= ~(M_EXT|M_EXTWR); } void m_freem(struct mbuf *m) { - while (m != NULL) - m = m_free(m); + struct mbuf *n; + int s; + + if (m == NULL) + return; + s = splnet(); + do { + n = m_free_unlocked(m); + } while ((m = n) != NULL); + splx(s); } /* @@ -432,8 +436,12 @@ m_defrag(struct mbuf *m, int how) /* free chain behind and possible ext buf on the first mbuf */ m_freem(m->m_next); m->m_next = NULL; - if (m->m_flags & M_EXT) + + if (m->m_flags & M_EXT) { + int s = splnet(); m_extfree(m); + splx(s); + } /* * Bounce copy mbuf over to the original mbuf and set everything up. @@ -606,7 +614,7 @@ m_copydata(struct mbuf *m, int off, int if (m == NULL) panic("m_copydata: null mbuf"); count = min(m->m_len - off, len); - memmove(cp, mtod(m, caddr_t) + off, count); + bcopy(mtod(m, caddr_t) + off, cp, count); len -= count; cp += count; off = 0; @@ -659,7 +667,7 @@ m_copyback(struct mbuf *m0, int off, int m->m_len += min(len - (m->m_len - off), M_TRAILINGSPACE(m)); mlen = min(m->m_len - off, len); - memmove(mtod(m, caddr_t) + off, cp, mlen); + bcopy(cp, mtod(m, caddr_t) + off, (size_t)mlen); cp += mlen; len -= mlen; totlen += mlen + off; @@ -712,8 +720,8 @@ m_cat(struct mbuf *m, struct mbuf *n) return; } /* splat the data from one into the other */ - memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), - n->m_len); + bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, + (u_int)n->m_len); m->m_len += n->m_len; n = m_free(n); } @@ -841,8 +849,8 @@ m_pullup(struct mbuf *n, int len) do { count = min(len, n->m_len); - memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), - count); + bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, + (unsigned)count); len -= count; m->m_len += count; n->m_len -= count; @@ -1012,7 +1020,7 @@ extpacket: MCLADDREFERENCE(m, n); n->m_data = m->m_data + len; } else { - memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain); + bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); } n->m_len = remain; m->m_len = len; @@ -1233,156 +1241,6 @@ m_print(void *v, m->m_ext.ext_free, m->m_ext.ext_arg); (*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n", m->m_ext.ext_nextref, m->m_ext.ext_prevref); - } } #endif - -/* - * mbuf lists - */ - -void ml_join(struct mbuf_list *, struct mbuf_list *); - -void -ml_init(struct mbuf_list *ml) -{ - ml->ml_head = ml->ml_tail = NULL; - ml->ml_len = 0; -} - -void -ml_enqueue(struct mbuf_list *ml, struct mbuf *m) -{ - if (ml->ml_tail == NULL) - ml->ml_head = ml->ml_tail = m; - else { - ml->ml_tail->m_nextpkt = m; - ml->ml_tail = m; - } - - m->m_nextpkt = NULL; - ml->ml_len++; -} - -void -ml_join(struct mbuf_list *mla, struct mbuf_list *mlb) -{ - if (mla->ml_tail == NULL) - *mla = *mlb; - else if (mlb->ml_tail != NULL) { - mla->ml_tail->m_nextpkt = mlb->ml_head; - mla->ml_tail = mlb->ml_tail; - mla->ml_len += mlb->ml_len; - - ml_init(mlb); - } -} - -struct mbuf * -ml_dequeue(struct mbuf_list *ml) -{ - struct mbuf *m; - - m = ml->ml_head; - if (m != NULL) { - ml->ml_head = m->m_nextpkt; - if (ml->ml_head == NULL) - ml->ml_tail = NULL; - - m->m_nextpkt = NULL; - ml->ml_len--; - } - - return (m); -} - -struct mbuf * -ml_dechain(struct mbuf_list *ml) -{ - struct mbuf *m0; - - m0 = ml->ml_head; - - ml_init(ml); - - return (m0); -} - -/* - * mbuf queues - */ - -void -mq_init(struct mbuf_queue *mq, u_int maxlen, int ipl) -{ - mtx_init(&mq->mq_mtx, ipl); - ml_init(&mq->mq_list); - mq->mq_maxlen = maxlen; -} - -int -mq_enqueue(struct mbuf_queue *mq, struct mbuf *m) -{ - int dropped = 0; - - mtx_enter(&mq->mq_mtx); - if (mq_len(mq) < mq->mq_maxlen) - ml_enqueue(&mq->mq_list, m); - else { - mq->mq_drops++; - dropped = 1; - } - mtx_leave(&mq->mq_mtx); - - if (dropped) - m_freem(m); - - return (dropped); -} - -struct mbuf * -mq_dequeue(struct mbuf_queue *mq) -{ - struct mbuf *m; - - mtx_enter(&mq->mq_mtx); - m = ml_dequeue(&mq->mq_list); - mtx_leave(&mq->mq_mtx); - - return (m); -} - -int -mq_enlist(struct mbuf_queue *mq, struct mbuf_list *ml) -{ - int full; - - mtx_enter(&mq->mq_mtx); - ml_join(&mq->mq_list, ml); - full = mq_len(mq) >= mq->mq_maxlen; - mtx_leave(&mq->mq_mtx); - - return (full); -} - -void -mq_delist(struct mbuf_queue *mq, struct mbuf_list *ml) -{ - mtx_enter(&mq->mq_mtx); - *ml = mq->mq_list; - ml_init(&mq->mq_list); - mtx_leave(&mq->mq_mtx); -} - -struct mbuf * -mq_dechain(struct mbuf_queue *mq) -{ - struct mbuf *m0; - - mtx_enter(&mq->mq_mtx); - m0 = ml_dechain(&mq->mq_list); - mtx_leave(&mq->mq_mtx); - - return (m0); -} Index: sys/mbuf.h =================================================================== RCS file: /cvs/src/sys/sys/mbuf.h,v retrieving revision 1.183 diff -u -p -r1.183 mbuf.h --- sys/mbuf.h 3 Oct 2014 01:02:47 -0000 1.183 +++ sys/mbuf.h 7 Jan 2015 03:37:52 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: mbuf.h,v 1.183 2014/10/03 01:02:47 dlg Exp $ */ +/* $OpenBSD: mbuf.h,v 1.180 2014/07/13 09:52:48 dlg Exp $ */ /* $NetBSD: mbuf.h,v 1.19 1996/02/09 18:25:14 christos Exp $ */ /* @@ -36,6 +36,7 @@ #define _SYS_MBUF_H_ #include +#include #include /* @@ -245,6 +246,8 @@ struct mbuf { /* * Macros for tracking external storage associated with an mbuf. + * + * Note: add and delete reference must be called at splnet(). */ #ifdef DEBUG #define MCLREFDEBUGN(m, file, line) do { \ @@ -262,7 +265,16 @@ struct mbuf { #define MCLISREFERENCED(m) ((m)->m_ext.ext_nextref != (m)) -#define MCLADDREFERENCE(o, n) m_extref((o), (n)) +#define MCLADDREFERENCE(o, n) do { \ + int ms = splnet(); \ + (n)->m_flags |= ((o)->m_flags & (M_EXT|M_EXTWR)); \ + (n)->m_ext.ext_nextref = (o)->m_ext.ext_nextref; \ + (n)->m_ext.ext_prevref = (o); \ + (o)->m_ext.ext_nextref = (n); \ + (n)->m_ext.ext_nextref->m_ext.ext_prevref = (n); \ + splx(ms); \ + MCLREFDEBUGN((n), __FILE__, __LINE__); \ + } while (/* CONSTCOND */ 0) #define MCLINITREFERENCE(m) do { \ (m)->m_ext.ext_prevref = (m); \ @@ -400,6 +412,7 @@ void mbinit(void); struct mbuf *m_copym2(struct mbuf *, int, int, int); struct mbuf *m_copym(struct mbuf *, int, int, int); struct mbuf *m_free(struct mbuf *); +struct mbuf *m_free_unlocked(struct mbuf *); struct mbuf *m_get(int, int); struct mbuf *m_getclr(int, int); struct mbuf *m_gethdr(int, int); @@ -414,7 +427,6 @@ struct mbuf *m_getptr(struct mbuf *, in int m_leadingspace(struct mbuf *); int m_trailingspace(struct mbuf *); struct mbuf *m_clget(struct mbuf *, int, struct ifnet *, u_int); -void m_extref(struct mbuf *, struct mbuf *); void m_extfree_pool(caddr_t, u_int, void *); void m_adj(struct mbuf *, int); int m_copyback(struct mbuf *, int, int, const void *, int); @@ -467,57 +479,6 @@ struct m_tag *m_tag_next(struct mbuf *, * has payload larger than the value below. */ #define PACKET_TAG_MAXSIZE 52 - -/* - * mbuf lists - */ - -#include - -struct mbuf_list { - struct mbuf *ml_head; - struct mbuf *ml_tail; - u_int ml_len; -}; - -#define MBUF_LIST_INITIALIZER() { NULL, NULL, 0 } - -void ml_init(struct mbuf_list *); -void ml_enqueue(struct mbuf_list *, struct mbuf *); -struct mbuf * ml_dequeue(struct mbuf_list *); -struct mbuf * ml_dechain(struct mbuf_list *); - -#define ml_len(_ml) ((_ml)->ml_len) -#define ml_empty(_ml) ((_ml)->ml_len == 0) - -#define MBUF_LIST_FOREACH(_ml, _m) \ - for ((_m) = (_ml)->ml_head; (_m) != NULL; (_m) = (_m)->m_nextpkt) - -/* - * mbuf queues - */ - -struct mbuf_queue { - struct mutex mq_mtx; - struct mbuf_list mq_list; - u_int mq_maxlen; - u_int mq_drops; -}; - -#define MBUF_QUEUE_INITIALIZER(_maxlen, _ipl) \ - { MUTEX_INITIALIZER(_ipl), MBUF_LIST_INITIALIZER(), (_maxlen), 0 } - -void mq_init(struct mbuf_queue *, u_int, int); -int mq_enqueue(struct mbuf_queue *, struct mbuf *); -struct mbuf * mq_dequeue(struct mbuf_queue *); -int mq_enlist(struct mbuf_queue *, struct mbuf_list *); -void mq_delist(struct mbuf_queue *, struct mbuf_list *); -struct mbuf * mq_dechain(struct mbuf_queue *); - -#define mq_len(_mq) ml_len(&(_mq)->mq_list) -#define mq_empty(_mq) ml_empty(&(_mq)->mq_list) -#define mq_drops(_mq) ((_mq)->mq_drops) -#define mq_set_maxlen(_mq, _l) ((_mq)->mq_maxlen = (_l)) #endif /* _KERNEL */ #endif /* _SYS_MBUF_H_ */ Index: sys/pool.h =================================================================== RCS file: /cvs/src/sys/sys/pool.h,v retrieving revision 1.56 diff -u -p -r1.56 pool.h --- sys/pool.h 22 Dec 2014 02:59:54 -0000 1.56 +++ sys/pool.h 7 Jan 2015 03:37:52 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: pool.h,v 1.56 2014/12/22 02:59:54 tedu Exp $ */ +/* $OpenBSD: pool.h,v 1.47 2014/07/02 00:23:36 dlg Exp $ */ /* $NetBSD: pool.h,v 1.27 2001/06/06 22:00:17 rafal Exp $ */ /*- @@ -73,16 +73,16 @@ struct kinfo_pool { #include struct pool; -struct pool_request; -TAILQ_HEAD(pool_requests, pool_request); struct pool_allocator { void *(*pa_alloc)(struct pool *, int, int *); void (*pa_free)(struct pool *, void *); int pa_pagesz; + int pa_pagemask; + int pa_pageshift; }; -TAILQ_HEAD(pool_pagelist, pool_item_header); +LIST_HEAD(pool_pagelist, pool_item_header); struct pool { struct mutex pr_mtx; @@ -97,6 +97,8 @@ struct pool { struct pool_item_header * pr_curpage; unsigned int pr_size; /* Size of item */ + unsigned int pr_align; /* Requested alignment, must be 2^n */ + unsigned int pr_itemoffset; /* Align this offset in item */ unsigned int pr_minitems; /* minimum # of items to keep */ unsigned int pr_minpages; /* same in page units */ unsigned int pr_maxpages; /* maximum # of idle pages to keep */ @@ -108,16 +110,19 @@ struct pool { unsigned int pr_hardlimit; /* hard limit to number of allocated items */ unsigned int pr_serial; /* unique serial number of the pool */ - unsigned int pr_pgsize; /* Size of a "page" */ - vaddr_t pr_pgmask; /* Mask with an item to get a page */ struct pool_allocator * pr_alloc; /* backend allocator */ const char * pr_wchan; /* tsleep(9) identifier */ + unsigned int pr_flags; /* r/w flags */ + unsigned int pr_roflags; /* r/o flags */ #define PR_WAITOK 0x0001 /* M_WAITOK */ #define PR_NOWAIT 0x0002 /* M_NOWAIT */ #define PR_LIMITFAIL 0x0004 /* M_CANFAIL */ #define PR_ZERO 0x0008 /* M_ZERO */ #define PR_WANTED 0x0100 +#define PR_PHINPAGE 0x0200 +#define PR_LOGGING 0x0400 +#define PR_DEBUG 0x0800 #define PR_DEBUGCHK 0x1000 int pr_ipl; @@ -125,6 +130,8 @@ struct pool { RB_HEAD(phtree, pool_item_header) pr_phtree; + int pr_maxcolor; /* Cache colouring */ + int pr_curcolor; int pr_phoffset; /* Offset in page of page header */ /* @@ -136,14 +143,6 @@ struct pool { struct timeval pr_hardlimit_warning_last; /* - * pool item requests queue - */ - struct mutex pr_requests_mtx; - struct pool_requests - pr_requests; - unsigned int pr_requesting; - - /* * Instrumentation */ unsigned long pr_nget; /* # of successful requests */ @@ -165,13 +164,7 @@ struct pool { extern struct pool_allocator pool_allocator_nointr; -struct pool_request { - TAILQ_ENTRY(pool_request) pr_entry; - void (*pr_handler)(void *, void *); - void *pr_cookie; - void *pr_item; -}; - +/* these functions are not locked */ void pool_init(struct pool *, size_t, u_int, u_int, int, const char *, struct pool_allocator *); void pool_destroy(struct pool *); @@ -183,10 +176,8 @@ struct uvm_constraint_range; /* XXX */ void pool_set_constraints(struct pool *, const struct kmem_pa_mode *mode); +/* these functions are locked */ void *pool_get(struct pool *, int) __malloc; -void pool_request_init(struct pool_request *, - void (*)(void *, void *), void *); -void pool_request(struct pool *, struct pool_request *); void pool_put(struct pool *, void *); int pool_reclaim(struct pool *); void pool_reclaim_all(void); Index: uvm/uvm_swap.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_swap.c,v retrieving revision 1.132 diff -u -p -r1.132 uvm_swap.c --- uvm/uvm_swap.c 23 Dec 2014 04:47:30 -0000 1.132 +++ uvm/uvm_swap.c 7 Jan 2015 03:37:52 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_swap.c,v 1.132 2014/12/23 04:47:30 tedu Exp $ */ +/* $OpenBSD: uvm_swap.c,v 1.131 2014/11/18 02:37:31 tedu Exp $ */ /* $NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $ */ /* @@ -201,6 +201,25 @@ struct vndbuf { struct pool vndxfer_pool; struct pool vndbuf_pool; +#define getvndxfer(vnx) do { \ + int s = splbio(); \ + vnx = pool_get(&vndxfer_pool, PR_WAITOK); \ + splx(s); \ +} while (0) + +#define putvndxfer(vnx) { \ + pool_put(&vndxfer_pool, (void *)(vnx)); \ +} + +#define getvndbuf(vbp) do { \ + int s = splbio(); \ + vbp = pool_get(&vndbuf_pool, PR_WAITOK); \ + splx(s); \ +} while (0) + +#define putvndbuf(vbp) { \ + pool_put(&vndbuf_pool, (void *)(vbp)); \ +} /* * local variables @@ -279,10 +298,8 @@ uvm_swap_init(void) /* allocate pools for structures used for swapping to files. */ pool_init(&vndxfer_pool, sizeof(struct vndxfer), 0, 0, 0, "swp vnx", NULL); - pool_setipl(&vndxfer_pool, IPL_BIO); pool_init(&vndbuf_pool, sizeof(struct vndbuf), 0, 0, 0, "swp vnd", NULL); - pool_setipl(&vndbuf_pool, IPL_BIO); /* Setup the initial swap partition */ swapmount(); @@ -1118,7 +1135,7 @@ sw_reg_strategy(struct swapdev *sdp, str * allocate a vndxfer head for this transfer and point it to * our buffer. */ - vnx = pool_get(&vndxfer_pool, PR_WAITOK); + getvndxfer(vnx); vnx->vx_flags = VX_BUSY; vnx->vx_error = 0; vnx->vx_pending = 0; @@ -1188,7 +1205,7 @@ sw_reg_strategy(struct swapdev *sdp, str * at the front of the nbp structure so that you can * cast pointers between the two structure easily. */ - nbp = pool_get(&vndbuf_pool, PR_WAITOK); + getvndbuf(nbp); nbp->vb_buf.b_flags = bp->b_flags | B_CALL; nbp->vb_buf.b_bcount = sz; nbp->vb_buf.b_bufsize = sz; @@ -1233,7 +1250,7 @@ sw_reg_strategy(struct swapdev *sdp, str s = splbio(); if (vnx->vx_error != 0) { - pool_put(&vndbuf_pool, nbp); + putvndbuf(nbp); goto out; } vnx->vx_pending++; @@ -1262,7 +1279,7 @@ out: /* Arrive here at splbio */ bp->b_error = vnx->vx_error; bp->b_flags |= B_ERROR; } - pool_put(&vndxfer_pool, vnx); + putvndxfer(vnx); biodone(bp); } splx(s); @@ -1337,7 +1354,7 @@ sw_reg_iodone_internal(void *xvbp, void } /* kill vbp structure */ - pool_put(&vndbuf_pool, vbp); + putvndbuf(vbp); /* * wrap up this transaction if it has run to completion or, in @@ -1348,13 +1365,13 @@ sw_reg_iodone_internal(void *xvbp, void pbp->b_flags |= B_ERROR; pbp->b_error = vnx->vx_error; if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { - pool_put(&vndxfer_pool, vnx); + putvndxfer(vnx); biodone(pbp); } } else if (pbp->b_resid == 0) { KASSERT(vnx->vx_pending == 0); if ((vnx->vx_flags & VX_BUSY) == 0) { - pool_put(&vndxfer_pool, vnx); + putvndxfer(vnx); biodone(pbp); } } @@ -1706,9 +1723,11 @@ uvm_swap_io(struct vm_page **pps, int st * now allocate a buf for the i/o. * [make sure we don't put the pagedaemon to sleep...] */ + s = splbio(); pflag = (async || curproc == uvm.pagedaemon_proc) ? PR_NOWAIT : PR_WAITOK; - bp = pool_get(&bufpool, pflag | PR_ZERO); + bp = pool_get(&bufpool, pflag); + splx(s); /* * if we failed to get a swapbuf, return "try again"