Index: bpfdesc.h =================================================================== RCS file: /cvs/src/sys/net/bpfdesc.h,v retrieving revision 1.29 diff -u -p -r1.29 bpfdesc.h --- bpfdesc.h 3 Dec 2015 16:27:32 -0000 1.29 +++ bpfdesc.h 20 Feb 2016 08:26:45 -0000 @@ -49,30 +49,38 @@ */ struct bpf_d { SRPL_ENTRY(bpf_d) bd_next; /* Linked list of descriptors */ + + struct mutex bd_wr_lock; + struct rwlock bd_rd_lock; /* - * Buffer slots: two mbuf clusters buffer the incoming packets. + * Buffer slots for storing incoming packets * The model has three slots. Sbuf is always occupied. * sbuf (store) - Receive interrupt puts packets here. * hbuf (hold) - When sbuf is full, put cluster here and * wakeup read (replace sbuf with fbuf). * fbuf (free) - When read is done, put cluster here. - * On receiving, if sbuf is full and fbuf is 0, packet is dropped. + * On receiving, if sbuf is full and fbuf is NULL, packet is dropped. */ - caddr_t bd_sbuf; /* store slot */ - caddr_t bd_hbuf; /* hold slot */ - caddr_t bd_fbuf; /* free slot */ - int bd_slen; /* current length of store buffer */ - int bd_hlen; /* current length of hold buffer */ + void * bd_sbuf; /* store slot */ + void * bd_fbuf; /* free slot */ + void * bd_hbuf; /* hold slot */ + + u_int bd_slen; /* current length of store buffer */ + u_int bd_hlen; /* current length of hold buffer */ + + u_int bd_hdrlen; + u_int bd_bufsize; /* absolute length of buffers */ + + int bd_rtout; /* Read timeout in 'ticks' */ + int bd_rdStart; /* when the read started */ - int bd_bufsize; /* absolute length of buffers */ + u_int64_t bd_dcount; /* number of packets dropped */ + + u_long bd_rcount; /* number of packets received */ struct bpf_if * bd_bif; /* interface descriptor */ - u_long bd_rtout; /* Read timeout in 'ticks' */ - u_long bd_rdStart; /* when the read started */ struct srp bd_rfilter; /* read filter code */ struct srp bd_wfilter; /* write filter code */ - u_long bd_rcount; /* number of packets received */ - u_long bd_dcount; /* number of packets dropped */ u_char bd_promisc; /* true if listening promiscuously */ u_char bd_state; /* idle, waiting, or timed out */ Index: bpf.c =================================================================== RCS file: /cvs/src/sys/net/bpf.c,v retrieving revision 1.135 diff -u -p -r1.135 bpf.c --- bpf.c 12 Feb 2016 18:56:12 -0000 1.135 +++ bpf.c 20 Feb 2016 08:26:45 -0000 @@ -103,7 +103,7 @@ int bpf_setif(struct bpf_d *, struct ifr int bpfpoll(dev_t, int, struct proc *); int bpfkqfilter(dev_t, struct knote *); void bpf_wakeup(struct bpf_d *); -void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t, +void bpf_catchpacket(struct bpf_d *, const void *, size_t, size_t, void (*)(const void *, void *, size_t), struct timeval *); void bpf_reset_d(struct bpf_d *); int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); @@ -269,6 +269,7 @@ bpf_attachd(struct bpf_d *d, struct bpf_ * it will divert packets to bpf. */ + d->bd_hdrlen = bp->bif_hdrlen; d->bd_bif = bp; KERNEL_ASSERT_LOCKED(); @@ -388,8 +389,9 @@ int bpfread(dev_t dev, struct uio *uio, int ioflag) { struct bpf_d *d; - int error; - int s; + int error = 0; + void *buf = NULL; + int immediate; d = bpfilter_lookup(minor(dev)); if (d->bd_bif == NULL) @@ -402,10 +404,14 @@ bpfread(dev_t dev, struct uio *uio, int if (uio->uio_resid != d->bd_bufsize) return (EINVAL); - s = splnet(); - D_GET(d); + error = rw_enter(&d->bd_rd_lock, RW_WRITE | RW_INTR); + if (error != 0) { + D_PUT(d); + return (error); + } + /* * If there's a timeout, bd_rdStart is tagged when we start the read. * we can then figure out when we're done reading. @@ -415,29 +421,39 @@ bpfread(dev_t dev, struct uio *uio, int else d->bd_rdStart = 0; + immediate = d->bd_immediate; + /* * If the hold buffer is empty, then do a timed sleep, which * ends when the timeout expires or when enough packets * have arrived to fill the store buffer. */ - while (d->bd_hbuf == 0) { + while (d->bd_hbuf == NULL) { if (d->bd_bif == NULL) { /* interface is gone */ - if (d->bd_slen == 0) { - D_PUT(d); - splx(s); - return (EIO); - } - ROTATE_BUFFERS(d); + + mtx_enter(&d->bd_wr_lock); + if (d->bd_slen == 0) + error = EIO; + else if (d->bd_fbuf != NULL) + ROTATE_BUFFERS(d); + mtx_leave(&d->bd_wr_lock); + + if (error != 0) + goto exit; + break; } - if (d->bd_immediate && d->bd_slen != 0) { + if (immediate) { /* * A packet(s) either arrived since the previous * read or arrived while we were asleep. * Rotate the buffers and return what's here. */ - ROTATE_BUFFERS(d); + mtx_enter(&d->bd_wr_lock); + if (d->bd_fbuf != NULL) + ROTATE_BUFFERS(d); + mtx_leave(&d->bd_wr_lock); break; } if (d->bd_rtout == -1) { @@ -450,55 +466,34 @@ bpfread(dev_t dev, struct uio *uio, int } else error = EWOULDBLOCK; } - if (error == EINTR || error == ERESTART) { - D_PUT(d); - splx(s); - return (error); - } - if (error == EWOULDBLOCK) { - /* - * On a timeout, return what's in the buffer, - * which may be nothing. If there is something - * in the store buffer, we can rotate the buffers. - */ - if (d->bd_hbuf) - /* - * We filled up the buffer in between - * getting the timeout and arriving - * here, so we don't need to rotate. - */ - break; + if (error == EINTR || error == ERESTART) + goto exit; - if (d->bd_slen == 0) { - D_PUT(d); - splx(s); - return (0); - } - ROTATE_BUFFERS(d); - break; - } + if (error == EWOULDBLOCK) + immediate = 1; } + /* * At this point, we know we have something in the hold slot. */ - splx(s); + KASSERT(d->bd_hbuf != NULL); /* * Move data from hold buffer into user space. * We know the entire buffer is transferred since * we checked above that the read buffer is bpf_bufsize bytes. */ - error = uiomove(d->bd_hbuf, d->bd_hlen, uio); - s = splnet(); - d->bd_fbuf = d->bd_hbuf; + buf = d->bd_hbuf; d->bd_hbuf = NULL; - d->bd_hlen = 0; + error = uiomove(buf, d->bd_hlen, uio); + d->bd_fbuf = buf; +exit: + rw_exit(&d->bd_rd_lock); D_PUT(d); - splx(s); - return (error); + } @@ -577,15 +572,20 @@ bpfwrite(dev_t dev, struct uio *uio, int void bpf_reset_d(struct bpf_d *d) { + mtx_enter(&d->bd_wr_lock); + if (d->bd_hbuf) { /* Free the hold buffer. */ d->bd_fbuf = d->bd_hbuf; d->bd_hbuf = NULL; } + d->bd_slen = 0; d->bd_hlen = 0; d->bd_rcount = 0; d->bd_dcount = 0; + + mtx_leave(&d->bd_wr_lock); } /* @@ -704,9 +704,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t * Flush read packet buffer. */ case BIOCFLUSH: - s = splnet(); bpf_reset_d(d); - splx(s); break; /* @@ -926,7 +924,6 @@ bpf_setf(struct bpf_d *d, struct bpf_pro struct srp *filter; struct bpf_insn *fcode; u_int flen, size; - int s; KERNEL_ASSERT_LOCKED(); filter = wf ? &d->bd_wfilter : &d->bd_rfilter; @@ -935,9 +932,7 @@ bpf_setf(struct bpf_d *d, struct bpf_pro if (fp->bf_len != 0) return (EINVAL); srp_update_locked(&bpf_insn_gc, filter, NULL); - s = splnet(); bpf_reset_d(d); - splx(s); return (0); } flen = fp->bf_len; @@ -962,9 +957,7 @@ bpf_setf(struct bpf_d *d, struct bpf_pro srp_update_locked(&bpf_insn_gc, filter, bf); - s = splnet(); bpf_reset_d(d); - splx(s); return (0); } @@ -1011,8 +1004,8 @@ bpf_setif(struct bpf_d *d, struct ifreq bpf_attachd(d, candidate); } - bpf_reset_d(d); splx(s); + bpf_reset_d(d); return (0); } /* Not found. */ @@ -1143,7 +1136,6 @@ bpf_tap(caddr_t arg, u_char *pkt, u_int size_t slen; struct timeval tv; int drop = 0, gottime = 0; - int s; if (bp == NULL) return (0); @@ -1168,17 +1160,10 @@ bpf_tap(caddr_t arg, u_char *pkt, u_int if (!gottime++) microtime(&tv); - KERNEL_LOCK(); - s = splnet(); - if (d->bd_bif != NULL) { - bpf_catchpacket(d, pkt, pktlen, slen, - bcopy, &tv); - } - splx(s); - KERNEL_UNLOCK(); + bpf_catchpacket(d, pkt, pktlen, slen, bcopy, &tv); if (d->bd_fildrop) - drop++; + drop = 1; } } SRPL_LEAVE(&i, d); @@ -1224,7 +1209,6 @@ _bpf_mtap(caddr_t arg, struct mbuf *m, u struct mbuf *m0; struct timeval tv; int gottime = 0; - int s; if (m == NULL) return; @@ -1261,14 +1245,7 @@ _bpf_mtap(caddr_t arg, struct mbuf *m, u if (!gottime++) microtime(&tv); - KERNEL_LOCK(); - s = splnet(); - if (d->bd_bif != NULL) { - bpf_catchpacket(d, (u_char *)m, pktlen, slen, - cpfn, &tv); - } - splx(s); - KERNEL_UNLOCK(); + bpf_catchpacket(d, m, pktlen, slen, cpfn, &tv); if (d->bd_fildrop) m->m_flags |= M_FILDROP; @@ -1380,12 +1357,13 @@ bpf_mtap_ether(caddr_t arg, struct mbuf * pkt is really an mbuf. */ void -bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen, +bpf_catchpacket(struct bpf_d *d, const void *pkt, size_t pktlen, size_t snaplen, void (*cpfn)(const void *, void *, size_t), struct timeval *tv) { struct bpf_hdr *hp; int totlen, curlen; - int hdrlen = d->bd_bif->bif_hdrlen; + int hdrlen = d->bd_hdrlen; + int wake = 0; /* * Figure out how many bytes to move. If the packet is @@ -1397,6 +1375,8 @@ bpf_catchpacket(struct bpf_d *d, u_char if (totlen > d->bd_bufsize) totlen = d->bd_bufsize; + mtx_enter(&d->bd_wr_lock); + /* * Round up the end of the previous packet to the next longword. */ @@ -1412,26 +1392,30 @@ bpf_catchpacket(struct bpf_d *d, u_char * We haven't completed the previous read yet, * so drop the packet. */ - ++d->bd_dcount; + d->bd_dcount++; + mtx_leave(&d->bd_wr_lock); return; } + ROTATE_BUFFERS(d); - bpf_wakeup(d); + wake = 1; curlen = 0; } /* * Append the bpf header. */ - hp = (struct bpf_hdr *)(d->bd_sbuf + curlen); + hp = (struct bpf_hdr *)((uint8_t *)d->bd_sbuf + curlen); hp->bh_tstamp.tv_sec = tv->tv_sec; hp->bh_tstamp.tv_usec = tv->tv_usec; hp->bh_datalen = pktlen; hp->bh_hdrlen = hdrlen; + hp->bh_caplen = totlen - hdrlen; + /* * Copy the packet data into the store buffer and update its length. */ - (*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen)); + (*cpfn)(pkt, (uint8_t *)hp + hdrlen, hp->bh_caplen); d->bd_slen = curlen + totlen; if (d->bd_immediate) { @@ -1439,7 +1423,7 @@ bpf_catchpacket(struct bpf_d *d, u_char * Immediate mode is set. A packet arrived so any * reads should be woken up. */ - bpf_wakeup(d); + wake = 1; } if (d->bd_rdStart && (d->bd_rtout + d->bd_rdStart < ticks)) { @@ -1448,12 +1432,16 @@ bpf_catchpacket(struct bpf_d *d, u_char * may have timeouts set. We got here by getting * a packet, so wake up the reader. */ - if (d->bd_fbuf) { + if (d->bd_fbuf != NULL) { d->bd_rdStart = 0; ROTATE_BUFFERS(d); - bpf_wakeup(d); + wake = 1; } } + mtx_leave(&d->bd_wr_lock); + + if (wake) + bpf_wakeup(d); } /* @@ -1478,9 +1466,10 @@ bpf_freed(struct bpf_d *d) if (--d->bd_ref > 0) return; - free(d->bd_sbuf, M_DEVBUF, 0); - free(d->bd_hbuf, M_DEVBUF, 0); - free(d->bd_fbuf, M_DEVBUF, 0); + free(d->bd_sbuf, M_DEVBUF, d->bd_bufsize); + free(d->bd_hbuf, M_DEVBUF, d->bd_bufsize); + free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); + KERNEL_ASSERT_LOCKED(); srp_update_locked(&bpf_insn_gc, &d->bd_rfilter, NULL); srp_update_locked(&bpf_insn_gc, &d->bd_wfilter, NULL); @@ -1637,6 +1626,8 @@ bpfilter_create(int unit) return (NULL); if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) != NULL) { bd->bd_unit = unit; + mtx_init(&bd->bd_wr_lock, IPL_BIO); + rw_init(&bd->bd_rd_lock, "bpfrd"); LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list); } return (bd); @@ -1702,8 +1693,8 @@ bpf_setdlt(struct bpf_d *d, u_int dlt) s = splnet(); bpf_detachd(d); bpf_attachd(d, bp); - bpf_reset_d(d); splx(s); + bpf_reset_d(d); return (0); }