Index: sys/mbuf.h =================================================================== RCS file: /cvs/src/sys/sys/mbuf.h,v retrieving revision 1.182 diff -u -p -r1.182 mbuf.h --- sys/mbuf.h 18 Aug 2014 05:11:03 -0000 1.182 +++ sys/mbuf.h 22 Sep 2014 01:59:28 -0000 @@ -122,19 +122,12 @@ struct pkthdr { /* description of external storage mapped into mbuf, valid if M_EXT set */ struct mbuf_ext { + u_int *ext_ref; caddr_t ext_buf; /* start of buffer */ /* free routine if not the usual */ void (*ext_free)(caddr_t, u_int, void *); void *ext_arg; u_int ext_size; /* size of buffer, for ext_free */ - struct mbuf *ext_nextref; - struct mbuf *ext_prevref; -#ifdef DEBUG - const char *ext_ofile; - const char *ext_nfile; - int ext_oline; - int ext_nline; -#endif }; struct mbuf { @@ -245,41 +238,13 @@ struct mbuf { /* * Macros for tracking external storage associated with an mbuf. - * - * Note: add and delete reference must be called at splnet(). */ -#ifdef DEBUG -#define MCLREFDEBUGN(m, file, line) do { \ - (m)->m_ext.ext_nfile = (file); \ - (m)->m_ext.ext_nline = (line); \ - } while (/* CONSTCOND */ 0) -#define MCLREFDEBUGO(m, file, line) do { \ - (m)->m_ext.ext_ofile = (file); \ - (m)->m_ext.ext_oline = (line); \ - } while (/* CONSTCOND */ 0) -#else -#define MCLREFDEBUGN(m, file, line) -#define MCLREFDEBUGO(m, file, line) -#endif - -#define MCLISREFERENCED(m) ((m)->m_ext.ext_nextref != (m)) -#define MCLADDREFERENCE(o, n) do { \ - int ms = splnet(); \ - (n)->m_flags |= ((o)->m_flags & (M_EXT|M_EXTWR)); \ - (n)->m_ext.ext_nextref = (o)->m_ext.ext_nextref; \ - (n)->m_ext.ext_prevref = (o); \ - (o)->m_ext.ext_nextref = (n); \ - (n)->m_ext.ext_nextref->m_ext.ext_prevref = (n); \ - splx(ms); \ - MCLREFDEBUGN((n), __FILE__, __LINE__); \ - } while (/* CONSTCOND */ 0) +#define MCLISREFERENCED(m) \ + ((m)->m_ext.ext_ref != NULL && *(m)->m_ext.ext_ref > 1) #define MCLINITREFERENCE(m) do { \ - (m)->m_ext.ext_prevref = (m); \ - (m)->m_ext.ext_nextref = (m); \ - MCLREFDEBUGO((m), __FILE__, __LINE__); \ - MCLREFDEBUGN((m), NULL, 0); \ + (m)->m_ext.ext_ref = NULL; \ } while (/* CONSTCOND */ 0) /* @@ -411,7 +376,6 @@ void mbinit(void); struct mbuf *m_copym2(struct mbuf *, int, int, int); struct mbuf *m_copym(struct mbuf *, int, int, int); struct mbuf *m_free(struct mbuf *); -struct mbuf *m_free_unlocked(struct mbuf *); struct mbuf *m_get(int, int); struct mbuf *m_getclr(int, int); struct mbuf *m_gethdr(int, int); Index: kern/uipc_mbuf.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v retrieving revision 1.194 diff -u -p -r1.194 uipc_mbuf.c --- kern/uipc_mbuf.c 14 Sep 2014 14:17:26 -0000 1.194 +++ kern/uipc_mbuf.c 22 Sep 2014 01:59:28 -0000 @@ -81,12 +81,11 @@ #include #include #include - #include #include +#include #include - #include #ifdef DDB @@ -95,6 +94,7 @@ #endif struct mbstat mbstat; /* mbuf stats */ +struct mutex mbstatmtx = MUTEX_INITIALIZER(IPL_NET); struct pool mbpool; /* mbuf pool */ struct pool mtagpool; @@ -111,6 +111,9 @@ u_int mclsizes[] = { static char mclnames[MCLPOOLS][8]; struct pool mclpools[MCLPOOLS]; +struct pool mextrefpl; +int m_extref(struct mbuf *, struct mbuf *, int); + struct pool *m_clpool(u_int); int max_linkhdr; /* largest link-level header */ @@ -135,11 +138,14 @@ mbinit(void) int i; #if DIAGNOSTIC + if (mclsizes[0] != MCLBYTES) + panic("mbinit: the smalles cluster size != MCLBYTES"); if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES) panic("mbinit: the largest cluster size != MAXMCLBYTES"); #endif pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", NULL); + pool_setipl(&mbpool, IPL_NET); pool_set_constraints(&mbpool, &kp_dma_contig); pool_setlowat(&mbpool, mblowat); @@ -152,52 +158,24 @@ mbinit(void) mclsizes[i] >> 10); pool_init(&mclpools[i], mclsizes[i], 0, 0, 0, mclnames[i], NULL); + pool_setipl(&mclpools[i], IPL_NET); pool_set_constraints(&mclpools[i], &kp_dma_contig); pool_setlowat(&mclpools[i], mcllowat); } + pool_init(&mextrefpl, sizeof(u_int), + 0, 0, 0, "mextref", NULL); + pool_setipl(&mextrefpl, IPL_NET); + nmbclust_update(); } void nmbclust_update(void) { - int i; - /* - * Set the hard limit on the mclpools to the number of - * mbuf clusters the kernel is to support. Log the limit - * reached message max once a minute. - */ - for (i = 0; i < nitems(mclsizes); i++) { - (void)pool_sethardlimit(&mclpools[i], nmbclust, - mclpool_warnmsg, 60); - /* - * XXX this needs to be reconsidered. - * Setting the high water mark to nmbclust is too high - * but we need to have enough spare buffers around so that - * allocations in interrupt context don't fail or mclgeti() - * drivers may end up with empty rings. - */ - pool_sethiwat(&mclpools[i], nmbclust); - } pool_sethiwat(&mbpool, nmbclust); } -void -m_reclaim(void *arg, int flags) -{ - struct domain *dp; - struct protosw *pr; - int s = splnet(); - - for (dp = domains; dp; dp = dp->dom_next) - for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) - if (pr->pr_drain) - (*pr->pr_drain)(); - mbstat.m_drain++; - splx(s); -} - /* * Space allocation routines. */ @@ -205,20 +183,21 @@ struct mbuf * m_get(int nowait, int type) { struct mbuf *m; - int s; - s = splnet(); m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT); - if (m) - mbstat.m_mtypes[type]++; - splx(s); - if (m) { - m->m_type = type; - m->m_next = NULL; - m->m_nextpkt = NULL; - m->m_data = m->m_dat; - m->m_flags = 0; - } + if (m == NULL) + return (NULL); + + mtx_enter(&mbstatmtx); + mbstat.m_mtypes[type]++; + mtx_leave(&mbstatmtx); + + m->m_type = type; + m->m_next = NULL; + m->m_nextpkt = NULL; + m->m_data = m->m_dat; + m->m_flags = 0; + return (m); } @@ -230,25 +209,18 @@ struct mbuf * m_gethdr(int nowait, int type) { struct mbuf *m; - int s; - s = splnet(); m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT); - if (m) - mbstat.m_mtypes[type]++; - splx(s); - if (m) { - m->m_type = type; + if (m == NULL) + return (NULL); - /* keep in sync with m_inithdr */ - m->m_next = NULL; - m->m_nextpkt = NULL; - m->m_data = m->m_pktdat; - m->m_flags = M_PKTHDR; - memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr)); - m->m_pkthdr.pf.prio = IFQ_DEFPRIO; - } - return (m); + mtx_enter(&mbstatmtx); + mbstat.m_mtypes[type]++; + mtx_leave(&mbstatmtx); + + m->m_type = type; + + return (m_inithdr(m)); } struct mbuf * @@ -298,7 +270,6 @@ m_clget(struct mbuf *m, int how, struct struct mbuf *m0 = NULL; struct pool *pp; caddr_t buf; - int s; pp = m_clpool(pktlen); #ifdef DIAGNOSTIC @@ -306,23 +277,19 @@ m_clget(struct mbuf *m, int how, struct panic("m_clget: request for %u byte cluster", pktlen); #endif - s = splnet(); if (m == NULL) { - MGETHDR(m0, M_DONTWAIT, MT_DATA); - if (m0 == NULL) { - splx(s); + m0 = m_gethdr(how, MT_DATA); + if (m0 == NULL) return (NULL); - } + m = m0; } buf = pool_get(pp, how == M_WAIT ? PR_WAITOK : PR_NOWAIT); if (buf == NULL) { if (m0) m_freem(m0); - splx(s); return (NULL); } - splx(s); MEXTADD(m, buf, pp->pr_size, M_EXTWR, m_extfree_pool, pp); return (m); @@ -331,16 +298,18 @@ m_clget(struct mbuf *m, int how, struct void m_extfree_pool(caddr_t buf, u_int size, void *pp) { - splassert(IPL_NET); pool_put(pp, buf); } struct mbuf * -m_free_unlocked(struct mbuf *m) +m_free(struct mbuf *m) { struct mbuf *n; + mtx_enter(&mbstatmtx); mbstat.m_mtypes[m->m_type]--; + mtx_leave(&mbstatmtx); + n = m->m_next; if (m->m_flags & M_ZEROIZE) { m_zero(m); @@ -352,54 +321,67 @@ m_free_unlocked(struct mbuf *m) m_tag_delete_chain(m); if (m->m_flags & M_EXT) m_extfree(m); + pool_put(&mbpool, m); return (n); } -struct mbuf * -m_free(struct mbuf *m) +int +m_extref(struct mbuf *o, struct mbuf *n, int wait) { - struct mbuf *n; - int s; + u_int *ref; - s = splnet(); - n = m_free_unlocked(m); - splx(s); + if (o->m_ext.ext_ref == NULL) { + ref = pool_get(&mextrefpl, + wait == M_WAIT ? PR_WAITOK : PR_NOWAIT); + if (ref == NULL) + return (ENOBUFS); - return (n); + *ref = 2; + o->m_ext.ext_ref = ref; + } else + atomic_inc_int(o->m_ext.ext_ref); + + n->m_ext = o->m_ext; + n->m_flags |= o->m_flags & (M_EXT|M_EXTWR); + + return (0); +} + +static inline u_int +m_extunref(struct mbuf *m) +{ + u_int *ref = m->m_ext.ext_ref; + u_int count; + + if (ref == NULL) + return (0); + + m->m_ext.ext_ref = NULL; + count = atomic_dec_int_nv(ref); + if (count == 0) + pool_put(&mextrefpl, ref); + + return (count); } void m_extfree(struct mbuf *m) { - if (MCLISREFERENCED(m)) { - m->m_ext.ext_nextref->m_ext.ext_prevref = - m->m_ext.ext_prevref; - m->m_ext.ext_prevref->m_ext.ext_nextref = - m->m_ext.ext_nextref; - } else if (m->m_ext.ext_free) + if (m_extunref(m) == 0) { (*(m->m_ext.ext_free))(m->m_ext.ext_buf, m->m_ext.ext_size, m->m_ext.ext_arg); - else - panic("unknown type of extension buffer"); - m->m_ext.ext_size = 0; + } + m->m_flags &= ~(M_EXT|M_EXTWR); } void m_freem(struct mbuf *m) { - struct mbuf *n; - int s; - - if (m == NULL) - return; - s = splnet(); - do { - n = m_free_unlocked(m); - } while ((m = n) != NULL); - splx(s); + while (m != NULL) + m = m_free(m); } /* @@ -435,12 +417,8 @@ m_defrag(struct mbuf *m, int how) /* free chain behind and possible ext buf on the first mbuf */ m_freem(m->m_next); m->m_next = NULL; - - if (m->m_flags & M_EXT) { - int s = splnet(); + if (m->m_flags & M_EXT) m_extfree(m); - splx(s); - } /* * Bounce copy mbuf over to the original mbuf and set everything up. @@ -483,7 +461,7 @@ m_prepend(struct mbuf *m, int len, int h m->m_data -= len; m->m_len += len; } else { - MGET(mn, how, m->m_type); + mn = m_get(how, m->m_type); if (mn == NULL) { m_freem(m); return (NULL); @@ -497,6 +475,7 @@ m_prepend(struct mbuf *m, int len, int h } if (m->m_flags & M_PKTHDR) m->m_pkthdr.len += len; + return (m); } @@ -556,9 +535,9 @@ m_copym0(struct mbuf *m0, int off, int l n->m_len = min(len, m->m_len - off); if (m->m_flags & M_EXT) { if (!deep) { + if (m_extref(m, n, wait) != 0) + goto nospace; n->m_data = m->m_data + off; - n->m_ext = m->m_ext; - MCLADDREFERENCE(m, n); } else { /* * we are unsure about the way m was allocated. @@ -831,7 +810,7 @@ m_pullup(struct mbuf *n, int len) } else { if (len > MAXMCLBYTES) goto bad; - MGET(m, M_DONTWAIT, n->m_type); + m = m_get(M_DONTWAIT, n->m_type); if (m == NULL) goto bad; if (len > MHLEN) { @@ -938,7 +917,7 @@ m_inject(struct mbuf *m0, int len0, int return (NULL); } - MGET(n, wait, MT_DATA); + n = m_get(wait, MT_DATA); if (n == NULL) { if (n2) m_freem(n2); @@ -1008,15 +987,19 @@ m_split(struct mbuf *m0, int len0, int w m->m_next = NULL; return (n); } else { - MGET(n, wait, m->m_type); + n = m_get(wait, m->m_type); if (n == NULL) return (NULL); M_ALIGN(n, remain); } extpacket: if (m->m_flags & M_EXT) { - n->m_ext = m->m_ext; - MCLADDREFERENCE(m, n); + if (m_extref(m, n, wait) != 0) { + if (ISSET(m0->m_flags, M_PKTHDR)) + m0->m_pkthdr.len = olen; + m_freem(n); + return (NULL); + } n->m_data = m->m_data + len; } else { bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); @@ -1238,9 +1221,8 @@ m_print(void *v, m->m_ext.ext_buf, m->m_ext.ext_size); (*pr)("m_ext.ext_free: %p\tm_ext.ext_arg: %p\n", m->m_ext.ext_free, m->m_ext.ext_arg); - (*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n", - m->m_ext.ext_nextref, m->m_ext.ext_prevref); - + (*pr)("m_ext.ext_ref: %p\tcount: %u\n", m->m_ext.ext_ref, + m->m_ext.ext_ref == NULL ? 1 : *m->m_ext.ext_ref); } } #endif