Index: uipc_mbuf.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v retrieving revision 1.193 diff -u -p -r1.193 uipc_mbuf.c --- uipc_mbuf.c 18 Aug 2014 04:06:16 -0000 1.193 +++ uipc_mbuf.c 17 Sep 2014 12:59:04 -0000 @@ -82,12 +82,11 @@ #include #include #include - #include #include +#include #include - #include #ifdef DDB @@ -96,6 +95,7 @@ #endif struct mbstat mbstat; /* mbuf stats */ +struct mutex mbstatmtx = MUTEX_INITIALIZER(IPL_NET); struct pool mbpool; /* mbuf pool */ struct pool mtagpool; @@ -112,6 +112,9 @@ u_int mclsizes[] = { static char mclnames[MCLPOOLS][8]; struct pool mclpools[MCLPOOLS]; +struct pool mextrefpl; +int m_extref(struct mbuf *, struct mbuf *, int); + struct pool *m_clpool(u_int); int max_linkhdr; /* largest link-level header */ @@ -136,11 +139,14 @@ mbinit(void) int i; #if DIAGNOSTIC + if (mclsizes[0] != MCLBYTES) + panic("mbinit: the smalles cluster size != MCLBYTES"); if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES) panic("mbinit: the largest cluster size != MAXMCLBYTES"); #endif pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", NULL); + pool_setipl(&mbpool, IPL_NET); pool_set_constraints(&mbpool, &kp_dma_contig); pool_setlowat(&mbpool, mblowat); @@ -153,52 +159,24 @@ mbinit(void) mclsizes[i] >> 10); pool_init(&mclpools[i], mclsizes[i], 0, 0, 0, mclnames[i], NULL); + pool_setipl(&mclpools[i], IPL_NET); pool_set_constraints(&mclpools[i], &kp_dma_contig); pool_setlowat(&mclpools[i], mcllowat); } + pool_init(&mextrefpl, sizeof(u_int), + 0, 0, 0, "mextref", NULL); + pool_setipl(&mextrefpl, IPL_NET); + nmbclust_update(); } void nmbclust_update(void) { - int i; - /* - * Set the hard limit on the mclpools to the number of - * mbuf clusters the kernel is to support. Log the limit - * reached message max once a minute. - */ - for (i = 0; i < nitems(mclsizes); i++) { - (void)pool_sethardlimit(&mclpools[i], nmbclust, - mclpool_warnmsg, 60); - /* - * XXX this needs to be reconsidered. - * Setting the high water mark to nmbclust is too high - * but we need to have enough spare buffers around so that - * allocations in interrupt context don't fail or mclgeti() - * drivers may end up with empty rings. - */ - pool_sethiwat(&mclpools[i], nmbclust); - } pool_sethiwat(&mbpool, nmbclust); } -void -m_reclaim(void *arg, int flags) -{ - struct domain *dp; - struct protosw *pr; - int s = splnet(); - - for (dp = domains; dp; dp = dp->dom_next) - for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) - if (pr->pr_drain) - (*pr->pr_drain)(); - mbstat.m_drain++; - splx(s); -} - /* * Space allocation routines. */ @@ -206,20 +184,21 @@ struct mbuf * m_get(int nowait, int type) { struct mbuf *m; - int s; - s = splnet(); m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT); - if (m) - mbstat.m_mtypes[type]++; - splx(s); - if (m) { - m->m_type = type; - m->m_next = NULL; - m->m_nextpkt = NULL; - m->m_data = m->m_dat; - m->m_flags = 0; - } + if (m == NULL) + return (NULL); + + mtx_enter(&mbstatmtx); + mbstat.m_mtypes[type]++; + mtx_leave(&mbstatmtx); + + m->m_type = type; + m->m_next = NULL; + m->m_nextpkt = NULL; + m->m_data = m->m_dat; + m->m_flags = 0; + return (m); } @@ -231,25 +210,18 @@ struct mbuf * m_gethdr(int nowait, int type) { struct mbuf *m; - int s; - s = splnet(); m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT); - if (m) - mbstat.m_mtypes[type]++; - splx(s); - if (m) { - m->m_type = type; + if (m == NULL) + return (NULL); - /* keep in sync with m_inithdr */ - m->m_next = NULL; - m->m_nextpkt = NULL; - m->m_data = m->m_pktdat; - m->m_flags = M_PKTHDR; - memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr)); - m->m_pkthdr.pf.prio = IFQ_DEFPRIO; - } - return (m); + mtx_enter(&mbstatmtx); + mbstat.m_mtypes[type]++; + mtx_leave(&mbstatmtx); + + m->m_type = type; + + return (m_inithdr(m)); } struct mbuf * @@ -299,7 +271,6 @@ m_clget(struct mbuf *m, int how, struct struct mbuf *m0 = NULL; struct pool *pp; caddr_t buf; - int s; pp = m_clpool(pktlen); #ifdef DIAGNOSTIC @@ -307,23 +278,19 @@ m_clget(struct mbuf *m, int how, struct panic("m_clget: request for %u byte cluster", pktlen); #endif - s = splnet(); if (m == NULL) { - MGETHDR(m0, M_DONTWAIT, MT_DATA); - if (m0 == NULL) { - splx(s); + m0 = m_gethdr(how, MT_DATA); + if (m0 == NULL) return (NULL); - } + m = m0; } buf = pool_get(pp, how == M_WAIT ? PR_WAITOK : PR_NOWAIT); if (buf == NULL) { if (m0) m_freem(m0); - splx(s); return (NULL); } - splx(s); MEXTADD(m, buf, pp->pr_size, M_EXTWR, m_extfree_pool, pp); return (m); @@ -332,16 +299,18 @@ m_clget(struct mbuf *m, int how, struct void m_extfree_pool(caddr_t buf, u_int size, void *pp) { - splassert(IPL_NET); pool_put(pp, buf); } struct mbuf * -m_free_unlocked(struct mbuf *m) +m_free(struct mbuf *m) { struct mbuf *n; + mtx_enter(&mbstatmtx); mbstat.m_mtypes[m->m_type]--; + mtx_leave(&mbstatmtx); + n = m->m_next; if (m->m_flags & M_ZEROIZE) { m_zero(m); @@ -353,54 +322,67 @@ m_free_unlocked(struct mbuf *m) m_tag_delete_chain(m); if (m->m_flags & M_EXT) m_extfree(m); + pool_put(&mbpool, m); return (n); } -struct mbuf * -m_free(struct mbuf *m) +int +m_extref(struct mbuf *o, struct mbuf *n, int wait) { - struct mbuf *n; - int s; + u_int *ref; - s = splnet(); - n = m_free_unlocked(m); - splx(s); + if (o->m_ext.ext_ref == NULL) { + ref = pool_get(&mextrefpl, + wait == M_WAIT ? PR_WAITOK : PR_NOWAIT); + if (ref == NULL) + return (ENOBUFS); - return (n); + *ref = 2; + o->m_ext.ext_ref = ref; + } else + atomic_inc_int(o->m_ext.ext_ref); + + n->m_ext = o->m_ext; + n->m_flags |= o->m_flags & (M_EXT|M_EXTWR); + + return (0); +} + +static inline u_int +m_extunref(struct mbuf *m) +{ + u_int *ref = m->m_ext.ext_ref; + u_int count; + + if (ref == NULL) + return (0); + + m->m_ext.ext_ref = NULL; + count = atomic_dec_int_nv(ref); + if (count == 0) + pool_put(&mextrefpl, ref); + + return (count); } void m_extfree(struct mbuf *m) { - if (MCLISREFERENCED(m)) { - m->m_ext.ext_nextref->m_ext.ext_prevref = - m->m_ext.ext_prevref; - m->m_ext.ext_prevref->m_ext.ext_nextref = - m->m_ext.ext_nextref; - } else if (m->m_ext.ext_free) + if (m_extunref(m) == 0) { (*(m->m_ext.ext_free))(m->m_ext.ext_buf, m->m_ext.ext_size, m->m_ext.ext_arg); - else - panic("unknown type of extension buffer"); - m->m_ext.ext_size = 0; + } + m->m_flags &= ~(M_EXT|M_EXTWR); } void m_freem(struct mbuf *m) { - struct mbuf *n; - int s; - - if (m == NULL) - return; - s = splnet(); - do { - n = m_free_unlocked(m); - } while ((m = n) != NULL); - splx(s); + while (m != NULL) + m = m_free(m); } /* @@ -436,12 +418,8 @@ m_defrag(struct mbuf *m, int how) /* free chain behind and possible ext buf on the first mbuf */ m_freem(m->m_next); m->m_next = NULL; - - if (m->m_flags & M_EXT) { - int s = splnet(); + if (m->m_flags & M_EXT) m_extfree(m); - splx(s); - } /* * Bounce copy mbuf over to the original mbuf and set everything up. @@ -484,7 +462,7 @@ m_prepend(struct mbuf *m, int len, int h m->m_data -= len; m->m_len += len; } else { - MGET(mn, how, m->m_type); + mn = m_get(how, m->m_type); if (mn == NULL) { m_freem(m); return (NULL); @@ -498,6 +476,7 @@ m_prepend(struct mbuf *m, int len, int h } if (m->m_flags & M_PKTHDR) m->m_pkthdr.len += len; + return (m); } @@ -557,9 +536,9 @@ m_copym0(struct mbuf *m0, int off, int l n->m_len = min(len, m->m_len - off); if (m->m_flags & M_EXT) { if (!deep) { + if (m_extref(m, n, wait) != 0) + goto nospace; n->m_data = m->m_data + off; - n->m_ext = m->m_ext; - MCLADDREFERENCE(m, n); } else { /* * we are unsure about the way m was allocated. @@ -832,7 +811,7 @@ m_pullup(struct mbuf *n, int len) } else { if (len > MAXMCLBYTES) goto bad; - MGET(m, M_DONTWAIT, n->m_type); + m = m_get(M_DONTWAIT, n->m_type); if (m == NULL) goto bad; if (len > MHLEN) { @@ -939,7 +918,7 @@ m_inject(struct mbuf *m0, int len0, int return (NULL); } - MGET(n, wait, MT_DATA); + n = m_get(wait, MT_DATA); if (n == NULL) { if (n2) m_freem(n2); @@ -1009,15 +988,19 @@ m_split(struct mbuf *m0, int len0, int w m->m_next = NULL; return (n); } else { - MGET(n, wait, m->m_type); + n = m_get(wait, m->m_type); if (n == NULL) return (NULL); M_ALIGN(n, remain); } extpacket: if (m->m_flags & M_EXT) { - n->m_ext = m->m_ext; - MCLADDREFERENCE(m, n); + if (m_extref(m, n, wait) != 0) { + if (ISSET(m0->m_flags, M_PKTHDR)) + m0->m_pkthdr.len = olen; + m_freem(n); + return (NULL); + } n->m_data = m->m_data + len; } else { bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); @@ -1239,9 +1222,8 @@ m_print(void *v, m->m_ext.ext_buf, m->m_ext.ext_size); (*pr)("m_ext.ext_free: %p\tm_ext.ext_arg: %p\n", m->m_ext.ext_free, m->m_ext.ext_arg); - (*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n", - m->m_ext.ext_nextref, m->m_ext.ext_prevref); - + (*pr)("m_ext.ext_ref: %p\tcount: %u\n", m->m_ext.ext_ref, + m->m_ext.ext_ref == NULL ? 1 : *m->m_ext.ext_ref); } } #endif