? arch/amd64/compile/DEBUG.MP ? arch/amd64/conf/DEBUG.MP Index: arch/amd64/amd64/bus_dma.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/bus_dma.c,v retrieving revision 1.48 diff -u -p -r1.48 bus_dma.c --- arch/amd64/amd64/bus_dma.c 27 Jan 2015 05:10:30 -0000 1.48 +++ arch/amd64/amd64/bus_dma.c 20 Feb 2015 05:24:40 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: bus_dma.c,v 1.48 2015/01/27 05:10:30 dlg Exp $ */ +/* $OpenBSD: bus_dma.c,v 1.47 2015/01/24 15:13:55 kettenis Exp $ */ /* $NetBSD: bus_dma.c,v 1.3 2003/05/07 21:33:58 fvdl Exp $ */ /*- @@ -97,8 +97,8 @@ #include -int _bus_dmamap_load_buffer(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t, - struct proc *, int, paddr_t *, int *, int); +int _bus_dmamap_load_paddr(bus_dma_tag_t, bus_dmamap_t, bus_addr_t, bus_size_t); +int _bus_dmamap_load_vaddr(bus_dma_tag_t, bus_dmamap_t, void *, size_t, pmap_t); /* * Common function for DMA map creation. May be called by bus-specific @@ -161,8 +161,7 @@ int _bus_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf, bus_size_t buflen, struct proc *p, int flags) { - bus_addr_t lastaddr = 0; - int seg, error; + int error; /* * Make sure that on error condition we return "no valid mappings". @@ -173,14 +172,17 @@ _bus_dmamap_load(bus_dma_tag_t t, bus_dm if (buflen > map->_dm_size) return (EINVAL); - seg = 0; - error = _bus_dmamap_load_buffer(t, map, buf, buflen, p, flags, - &lastaddr, &seg, 1); - if (error == 0) { - map->dm_mapsize = buflen; - map->dm_nsegs = seg + 1; + error = _bus_dmamap_load_vaddr(t, map, buf, buflen, + p == NULL ? pmap_kernel() : p->p_vmspace->vm_map.pmap); + if (error != 0) { + map->dm_mapsize = 0; + map->dm_nsegs = 0; + return (error); } - return (error); + + map->dm_nsegs++; + + return (0); } /* @@ -190,9 +192,8 @@ int _bus_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m0, int flags) { - paddr_t lastaddr = 0; - int seg, error, first; struct mbuf *m; + int error; /* * Make sure that on error condition we return "no valid mappings". @@ -208,21 +209,22 @@ _bus_dmamap_load_mbuf(bus_dma_tag_t t, b if (m0->m_pkthdr.len > map->_dm_size) return (EINVAL); - first = 1; - seg = 0; - error = 0; - for (m = m0; m != NULL && error == 0; m = m->m_next) { + for (m = m0; m != NULL; m = m->m_next) { if (m->m_len == 0) continue; - error = _bus_dmamap_load_buffer(t, map, m->m_data, m->m_len, - NULL, flags, &lastaddr, &seg, first); - first = 0; - } - if (error == 0) { - map->dm_mapsize = m0->m_pkthdr.len; - map->dm_nsegs = seg + 1; + + error = _bus_dmamap_load_vaddr(t, map, m->m_data, m->m_len, + pmap_kernel()); + if (error != 0) { + map->dm_mapsize = 0; + map->dm_nsegs = 0; + return (error); + } } - return (error); + + map->dm_nsegs++; + + return (0); } /* @@ -232,12 +234,11 @@ int _bus_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map, struct uio *uio, int flags) { - paddr_t lastaddr = 0; - int seg, i, error, first; - bus_size_t minlen, resid; - struct proc *p = NULL; - struct iovec *iov; - caddr_t addr; + int i, error; + pmap_t pmap = pmap_kernel(); + struct iovec * const iov = uio->uio_iov; + bus_size_t resid = uio->uio_resid; + bus_size_t len; /* * Make sure that on error condition we return "no valid mappings". @@ -245,38 +246,30 @@ _bus_dmamap_load_uio(bus_dma_tag_t t, bu map->dm_mapsize = 0; map->dm_nsegs = 0; - resid = uio->uio_resid; - iov = uio->uio_iov; + if (resid > map->_dm_size) + return (EINVAL); - if (uio->uio_segflg == UIO_USERSPACE) { - p = uio->uio_procp; -#ifdef DIAGNOSTIC - if (p == NULL) - panic("_bus_dmamap_load_uio: USERSPACE but no proc"); -#endif - } + if (uio->uio_segflg == UIO_USERSPACE) + pmap = uio->uio_procp->p_vmspace->vm_map.pmap; - first = 1; - seg = 0; - error = 0; - for (i = 0; i < uio->uio_iovcnt && resid != 0 && error == 0; i++) { - /* - * Now at the first iovec to load. Load each iovec - * until we have exhausted the residual count. - */ - minlen = resid < iov[i].iov_len ? resid : iov[i].iov_len; - addr = (caddr_t)iov[i].iov_base; - - error = _bus_dmamap_load_buffer(t, map, addr, minlen, - p, flags, &lastaddr, &seg, first); - first = 0; - - resid -= minlen; - } - if (error == 0) { - map->dm_mapsize = uio->uio_resid; - map->dm_nsegs = seg + 1; + for (i = 0; i < uio->uio_iovcnt; i++) { + len = MIN(resid, iov[i].iov_len); + error = _bus_dmamap_load_vaddr(t, map, + iov[i].iov_base, len, pmap); + if (error != 0) { + map->dm_mapsize = 0; + map->dm_nsegs = 0; + return (error); + } + + resid -= len; + if (resid == 0) + break; } + /* check resid for leftovers? */ + + map->dm_nsegs++; + return (error); } @@ -288,10 +281,10 @@ int _bus_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags) { - bus_addr_t paddr, baddr, bmask, lastaddr = 0; - bus_size_t plen, sgsize, mapsize; - int first = 1; - int i, seg = 0; + bus_addr_t paddr; + bus_size_t plen; + int i; + int error; /* * Make sure that on error condition we return "no valid mappings". @@ -302,68 +295,24 @@ _bus_dmamap_load_raw(bus_dma_tag_t t, bu if (nsegs > map->_dm_segcnt || size > map->_dm_size) return (EINVAL); - mapsize = size; - bmask = ~(map->_dm_boundary - 1); - - for (i = 0; i < nsegs && size > 0; i++) { + for (i = 0; i < nsegs; i++) { paddr = segs[i].ds_addr; plen = MIN(segs[i].ds_len, size); - while (plen > 0) { - /* - * Compute the segment size, and adjust counts. - */ - sgsize = PAGE_SIZE - ((u_long)paddr & PGOFSET); - if (plen < sgsize) - sgsize = plen; - - if (paddr > dma_constraint.ucr_high) - panic("Non dma-reachable buffer at paddr %#lx(raw)", - paddr); - - /* - * Make sure we don't cross any boundaries. - */ - if (map->_dm_boundary > 0) { - baddr = (paddr + map->_dm_boundary) & bmask; - if (sgsize > (baddr - paddr)) - sgsize = (baddr - paddr); - } - - /* - * Insert chunk into a segment, coalescing with - * previous segment if possible. - */ - if (first) { - map->dm_segs[seg].ds_addr = paddr; - map->dm_segs[seg].ds_len = sgsize; - first = 0; - } else { - if (paddr == lastaddr && - (map->dm_segs[seg].ds_len + sgsize) <= - map->_dm_maxsegsz && - (map->_dm_boundary == 0 || - (map->dm_segs[seg].ds_addr & bmask) == - (paddr & bmask))) - map->dm_segs[seg].ds_len += sgsize; - else { - if (++seg >= map->_dm_segcnt) - return (EINVAL); - map->dm_segs[seg].ds_addr = paddr; - map->dm_segs[seg].ds_len = sgsize; - } - } - - paddr += sgsize; - plen -= sgsize; - size -= sgsize; - - lastaddr = paddr; + error = _bus_dmamap_load_paddr(t, map, paddr, plen); + if (error != 0) { + map->dm_mapsize = 0; + map->dm_nsegs = 0; + return (error); } + + size -= plen; + if (size == 0) + break; } - map->dm_mapsize = mapsize; - map->dm_nsegs = seg + 1; + map->dm_nsegs++; + return (0); } @@ -550,95 +499,100 @@ _bus_dmamem_mmap(bus_dma_tag_t t, bus_dm /********************************************************************** * DMA utility functions **********************************************************************/ -/* - * Utility function to load a linear buffer. lastaddrp holds state - * between invocations (for multiple-buffer loads). segp contains - * the starting segment on entrance, and the ending segment on exit. - * first indicates if this is the first invocation of this function. - */ + int -_bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf, - bus_size_t buflen, struct proc *p, int flags, paddr_t *lastaddrp, int *segp, - int first) -{ - bus_size_t sgsize; - bus_addr_t curaddr, lastaddr, baddr, bmask; - vaddr_t vaddr = (vaddr_t)buf; - int seg; - pmap_t pmap; - - if (p != NULL) - pmap = p->p_vmspace->vm_map.pmap; - else - pmap = pmap_kernel(); - - lastaddr = *lastaddrp; - bmask = ~(map->_dm_boundary - 1); - - for (seg = *segp; buflen > 0 ; ) { - /* - * Get the physical address for this segment. - */ - pmap_extract(pmap, vaddr, (paddr_t *)&curaddr); - - if (curaddr > dma_constraint.ucr_high) - panic("Non dma-reachable buffer at curaddr %#lx(raw)", - curaddr); - - /* - * Compute the segment size, and adjust counts. - */ - sgsize = PAGE_SIZE - ((u_long)vaddr & PGOFSET); - if (buflen < sgsize) - sgsize = buflen; - - /* - * Make sure we don't cross any boundaries. - */ +_bus_dmamap_load_paddr(bus_dma_tag_t t, bus_dmamap_t map, + bus_addr_t dvaddr, bus_size_t size) +{ + bus_dma_segment_t * const segs = map->dm_segs; + bus_addr_t bmask = ~(map->_dm_boundary - 1); + + int nseg = map->dm_nsegs; + bus_size_t len = size; + + if (dvaddr > dma_constraint.ucr_high) + panic("Non dma-reachable buffer at %#lx(raw)", dvaddr); + + if (map->dm_mapsize == 0) + ; /* First segment */ + else if (segs[nseg].ds_addr + segs[nseg].ds_len == dvaddr) { + /* Coalesce */ + dvaddr = segs[nseg].ds_addr; + len += segs[nseg].ds_len; + } else { + /* Next segment */ + if (++nseg >= map->_dm_segcnt) + return (EFBIG); + } + + for (;;) { + bus_size_t sgsize = len; + + /* Make sure we don't cross any boundaries. */ if (map->_dm_boundary > 0) { - baddr = (curaddr + map->_dm_boundary) & bmask; - if (sgsize > (baddr - curaddr)) - sgsize = (baddr - curaddr); - } + bus_addr_t baddr; /* next boundary address */ - /* - * Insert chunk into a segment, coalescing with - * previous segment if possible. - */ - if (first) { - map->dm_segs[seg].ds_addr = curaddr; - map->dm_segs[seg].ds_len = sgsize; - first = 0; - } else { - if (curaddr == lastaddr && - (map->dm_segs[seg].ds_len + sgsize) <= - map->_dm_maxsegsz && - (map->_dm_boundary == 0 || - (map->dm_segs[seg].ds_addr & bmask) == - (curaddr & bmask))) - map->dm_segs[seg].ds_len += sgsize; - else { - if (++seg >= map->_dm_segcnt) - break; - map->dm_segs[seg].ds_addr = curaddr; - map->dm_segs[seg].ds_len = sgsize; - } + baddr = (dvaddr + map->_dm_boundary) & bmask; + if (sgsize > (baddr - dvaddr)) + sgsize = (baddr - dvaddr); } - lastaddr = curaddr + sgsize; - vaddr += sgsize; - buflen -= sgsize; + /* Make sure we dont exceed the max segment size. */ + if (sgsize > map->_dm_maxsegsz) + sgsize = map->_dm_maxsegsz; + + /* Insert chunk into a segment. */ + segs[nseg].ds_addr = dvaddr; + segs[nseg].ds_len = sgsize; + + len -= sgsize; + if (len == 0) + break; + + if (++nseg >= map->_dm_segcnt) + return (EFBIG); + + dvaddr += sgsize; } - *segp = seg; - *lastaddrp = lastaddr; + map->dm_mapsize += size; + map->dm_nsegs = nseg; - /* - * Did we fit? - */ - if (buflen != 0) - return (EFBIG); /* XXX better return value here? */ return (0); +} + +int +_bus_dmamap_load_vaddr(bus_dma_tag_t t, bus_dmamap_t map, + void *buf, size_t size, pmap_t pmap) +{ + vaddr_t vaddr; + paddr_t paddr; + vaddr_t next, end; + int error; + + vaddr = (vaddr_t)buf; + end = vaddr + size; + + if (pmap == pmap_kernel() && + vaddr >= PMAP_DIRECT_BASE && end <= PMAP_DIRECT_END) + paddr = vaddr - PMAP_DIRECT_BASE; + else { + for (next = (vaddr + PAGE_SIZE) & ~PAGE_MASK; + next < end; next += PAGE_SIZE) { + pmap_extract(pmap, vaddr, &paddr); + error = _bus_dmamap_load_paddr(t, map, + paddr, next - vaddr); + if (error != 0) + return (error); + + vaddr = next; + } + + pmap_extract(pmap, vaddr, &paddr); + size = end - vaddr; + } + + return (_bus_dmamap_load_paddr(t, map, paddr, size)); } /* Index: arch/amd64/amd64/pmap.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/pmap.c,v retrieving revision 1.88 diff -u -p -r1.88 pmap.c --- arch/amd64/amd64/pmap.c 7 Feb 2015 01:46:27 -0000 1.88 +++ arch/amd64/amd64/pmap.c 20 Feb 2015 05:24:40 -0000 @@ -385,6 +385,9 @@ pmap_map_ptes(struct pmap *pmap, pt_entr lcr3(pmap->pm_pdirpa); } + if (pmap != pmap_kernel()) + mtx_enter(&pmap->pm_mtx); + *ptepp = PTE_BASE; *pdeppp = normal_pdes; return; @@ -393,6 +396,9 @@ pmap_map_ptes(struct pmap *pmap, pt_entr void pmap_unmap_ptes(struct pmap *pmap, paddr_t save_cr3) { + if (pmap != pmap_kernel()) + mtx_leave(&pmap->pm_mtx); + if (save_cr3 != 0) { x86_atomic_clearbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); lcr3(save_cr3); @@ -680,6 +686,7 @@ pmap_bootstrap(paddr_t first_avail, padd pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pvpl", &pool_allocator_nointr); pool_sethiwat(&pmap_pv_pool, 32 * 1024); + pool_setipl(&pmap_pv_pool, IPL_VM); /* * initialize the PDE pool. @@ -764,8 +771,10 @@ pmap_enter_pv(struct vm_page *pg, struct pve->pv_pmap = pmap; pve->pv_va = va; pve->pv_ptp = ptp; /* NULL for kernel pmap */ + mtx_enter(&pg->mdpage.pv_mtx); pve->pv_next = pg->mdpage.pv_list; /* add to ... */ pg->mdpage.pv_list = pve; /* ... list */ + mtx_leave(&pg->mdpage.pv_mtx); } /* @@ -780,6 +789,7 @@ pmap_remove_pv(struct vm_page *pg, struc { struct pv_entry *pve, **prevptr; + mtx_enter(&pg->mdpage.pv_mtx); prevptr = &pg->mdpage.pv_list; while ((pve = *prevptr) != NULL) { if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */ @@ -788,6 +798,7 @@ pmap_remove_pv(struct vm_page *pg, struc } prevptr = &pve->pv_next; /* previous pointer */ } + mtx_leave(&pg->mdpage.pv_mtx); return(pve); /* return removed pve */ } @@ -1007,6 +1018,8 @@ pmap_create(void) pmap = pool_get(&pmap_pmap_pool, PR_WAITOK); + mtx_init(&pmap->pm_mtx, IPL_VM); + /* init uvm_object */ for (i = 0; i < PTP_LEVELS - 1; i++) { uvm_objinit(&pmap->pm_obj[i], NULL, 1); @@ -1049,7 +1062,7 @@ pmap_destroy(struct pmap *pmap) * drop reference count */ - refs = --pmap->pm_obj[0].uo_refs; + refs = atomic_sub_int_nv(&pmap->pm_obj[0].uo_refs, 1); if (refs > 0) { return; } @@ -1095,7 +1108,7 @@ pmap_destroy(struct pmap *pmap) void pmap_reference(struct pmap *pmap) { - pmap->pm_obj[0].uo_refs++; + atomic_inc_int(&pmap->pm_obj[0].uo_refs); } /* @@ -1422,7 +1435,6 @@ pmap_do_remove(struct pmap *pmap, vaddr_ pmap_map_ptes(pmap, &ptes, &pdes, &scr3); shootself = (scr3 == 0); - /* * removing one page? take shortcut function. */ @@ -1563,8 +1575,11 @@ pmap_page_remove(struct vm_page *pg) TAILQ_INIT(&empty_ptps); + mtx_enter(&pg->mdpage.pv_mtx); while ((pve = pg->mdpage.pv_list) != NULL) { pg->mdpage.pv_list = pve->pv_next; + pmap_reference(pve->pv_pmap); + mtx_leave(&pg->mdpage.pv_mtx); /* XXX use direct map? */ pmap_map_ptes(pve->pv_pmap, &ptes, &pdes, &scr3); @@ -1604,8 +1619,11 @@ pmap_page_remove(struct vm_page *pg) } } pmap_unmap_ptes(pve->pv_pmap, scr3); + pmap_destroy(pve->pv_pmap); pool_put(&pmap_pv_pool, pve); + mtx_enter(&pg->mdpage.pv_mtx); } + mtx_leave(&pg->mdpage.pv_mtx); pmap_tlb_shootwait(); @@ -1640,12 +1658,14 @@ pmap_test_attrs(struct vm_page *pg, unsi return (TRUE); mybits = 0; + mtx_enter(&pg->mdpage.pv_mtx); for (pve = pg->mdpage.pv_list; pve != NULL && mybits == 0; pve = pve->pv_next) { level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes, &offs); mybits |= (ptes[offs] & testbits); } + mtx_leave(&pg->mdpage.pv_mtx); if (mybits == 0) return (FALSE); @@ -1675,6 +1695,7 @@ pmap_clear_attrs(struct vm_page *pg, uns if (result) atomic_clearbits_int(&pg->pg_flags, clearflags); + mtx_enter(&pg->mdpage.pv_mtx); for (pve = pg->mdpage.pv_list; pve != NULL; pve = pve->pv_next) { level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes, &offs); @@ -1686,6 +1707,7 @@ pmap_clear_attrs(struct vm_page *pg, uns pmap_is_curpmap(pve->pv_pmap)); } } + mtx_leave(&pg->mdpage.pv_mtx); pmap_tlb_shootwait(); @@ -1785,7 +1807,6 @@ pmap_write_protect(struct pmap *pmap, va pmap_tlb_shootrange(pmap, sva, eva, shootself); pmap_unmap_ptes(pmap, scr3); - pmap_tlb_shootwait(); } @@ -1874,7 +1895,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va pt_entry_t *ptes, opte, npte; pd_entry_t **pdes; struct vm_page *ptp, *pg = NULL; - struct pv_entry *pve = NULL; + struct pv_entry *pve, *opve = NULL; int ptpdelta, wireddelta, resdelta; boolean_t wired = (flags & PMAP_WIRED) != 0; boolean_t nocache = (pa & PMAP_NOCACHE) != 0; @@ -1896,6 +1917,15 @@ pmap_enter(struct pmap *pmap, vaddr_t va #endif + pve = pool_get(&pmap_pv_pool, PR_NOWAIT); + if (pve == NULL) { + if (flags & PMAP_CANFAIL) { + error = ENOMEM; + goto out; + } + panic("%s: no pv entries available", __func__); + } + /* * map in ptes and get a pointer to our PTP (unless we are the kernel) */ @@ -1908,6 +1938,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va ptp = pmap_get_ptp(pmap, va, pdes); if (ptp == NULL) { if (flags & PMAP_CANFAIL) { + pmap_unmap_ptes(pmap, scr3); error = ENOMEM; goto out; } @@ -1983,11 +2014,10 @@ pmap_enter(struct pmap *pmap, vaddr_t va __func__, pa, atop(pa)); #endif pmap_sync_flags_pte(pg, opte); - pve = pmap_remove_pv(pg, pmap, va); + opve = pmap_remove_pv(pg, pmap, va); pg = NULL; /* This is not the page we are looking for */ } } else { /* opte not valid */ - pve = NULL; resdelta = 1; if (wired) wireddelta = 1; @@ -2010,21 +2040,8 @@ pmap_enter(struct pmap *pmap, vaddr_t va pg = PHYS_TO_VM_PAGE(pa); if (pg != NULL) { - if (pve == NULL) { - pve = pool_get(&pmap_pv_pool, PR_NOWAIT); - if (pve == NULL) { - if (flags & PMAP_CANFAIL) { - error = ENOMEM; - goto out; - } - panic("%s: no pv entries available", __func__); - } - } pmap_enter_pv(pg, pve, pmap, va, ptp); - } else { - /* new mapping is not PG_PVLIST. free pve if we've got one */ - if (pve) - pool_put(&pmap_pv_pool, pve); + pve = NULL; } enter_now: @@ -2074,13 +2091,18 @@ enter_now: if (nocache && (opte & PG_N) == 0) wbinvd(); pmap_tlb_shootpage(pmap, va, shootself); - pmap_tlb_shootwait(); } + pmap_unmap_ptes(pmap, scr3); + pmap_tlb_shootwait(); + error = 0; out: - pmap_unmap_ptes(pmap, scr3); + if (pve) + pool_put(&pmap_pv_pool, pve); + if (opve) + pool_put(&pmap_pv_pool, opve); return error; } Index: arch/amd64/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/arch/amd64/conf/GENERIC,v retrieving revision 1.384 diff -u -p -r1.384 GENERIC --- arch/amd64/conf/GENERIC 6 Feb 2015 19:49:56 -0000 1.384 +++ arch/amd64/conf/GENERIC 20 Feb 2015 05:24:40 -0000 @@ -105,7 +105,7 @@ uguru0 at isa? disable port 0xe0 # ABIT aps0 at isa? port 0x1600 # ThinkPad Active Protection System -piixpm* at pci? # Intel PIIX PM +piixpm* at pci? disable # Intel PIIX PM iic* at piixpm? ichiic* at pci? # Intel ICH SMBus controller iic* at ichiic? Index: arch/amd64/include/pmap.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/pmap.h,v retrieving revision 1.53 diff -u -p -r1.53 pmap.h --- arch/amd64/include/pmap.h 15 Feb 2015 21:34:33 -0000 1.53 +++ arch/amd64/include/pmap.h 20 Feb 2015 05:24:40 -0000 @@ -74,6 +74,7 @@ #include #include #endif /* _KERNEL */ +#include #include #include #endif @@ -284,15 +285,11 @@ LIST_HEAD(pmap_head, pmap); /* struct pm * * note that the pm_obj contains the reference count, * page list, and number of PTPs within the pmap. - * - * pm_lock is the same as the spinlock for vm object 0. Changes to - * the other objects may only be made if that lock has been taken - * (the other object locks are only used when uvm_pagealloc is called) */ struct pmap { + struct mutex pm_mtx; struct uvm_object pm_obj[PTP_LEVELS-1]; /* objects for lvl >= 1) */ -#define pm_lock pm_obj[0].vmobjlock #define pm_obj_l1 pm_obj[0] #define pm_obj_l2 pm_obj[1] #define pm_obj_l3 pm_obj[2] @@ -531,10 +528,12 @@ kvtopte(vaddr_t va) #ifndef _LOCORE struct pv_entry; struct vm_page_md { + struct mutex pv_mtx; struct pv_entry *pv_list; }; #define VM_MDPAGE_INIT(pg) do { \ + mtx_init(&(pg)->mdpage.pv_mtx, IPL_VM); \ (pg)->mdpage.pv_list = NULL; \ } while (0) #endif /* !_LOCORE */ Index: conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v retrieving revision 1.214 diff -u -p -r1.214 GENERIC --- conf/GENERIC 21 Jan 2015 02:23:14 -0000 1.214 +++ conf/GENERIC 20 Feb 2015 05:24:41 -0000 @@ -3,6 +3,8 @@ # Machine-independent option; used by all architectures for their # GENERIC kernel +option MSGBUFSIZE="64*NBPG" + #option INSECURE # default to secure option DDB # in-kernel debugger Index: dev/pci/if_myx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_myx.c,v retrieving revision 1.74 diff -u -p -r1.74 if_myx.c --- dev/pci/if_myx.c 18 Feb 2015 23:58:34 -0000 1.74 +++ dev/pci/if_myx.c 20 Feb 2015 05:24:41 -0000 @@ -1546,6 +1546,7 @@ myx_start(struct ifnet *ifp) struct myx_buf *mb, *firstmb; struct mbuf *m; u_int32_t offset = sc->sc_tx_ring_offset; + u_int free; u_int idx, firstidx; u_int8_t flags; @@ -1554,8 +1555,9 @@ myx_start(struct ifnet *ifp) IFQ_IS_EMPTY(&ifp->if_snd)) return; + free = atomic_swap_uint(&sc->sc_tx_free, 0); for (;;) { - if (sc->sc_tx_free <= sc->sc_tx_nsegs || + if (free <= sc->sc_tx_nsegs || (mb = myx_buf_get(&sc->sc_tx_buf_free)) == NULL) { SET(ifp->if_flags, IFF_OACTIVE); break; @@ -1574,7 +1576,7 @@ myx_start(struct ifnet *ifp) continue; } -#if NBPFILTER > 0 +#if 0 && NBPFILTER > 0 if (ifp->if_bpf) bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); #endif @@ -1587,9 +1589,10 @@ myx_start(struct ifnet *ifp) SIMPLEQ_INSERT_TAIL(&list, mb, mb_entry); - sc->sc_tx_free -= map->dm_nsegs + + free -= map->dm_nsegs + (map->dm_mapsize < 60 ? 1 : 0); } + atomic_add_int(&sc->sc_tx_free, free); /* post the first descriptor last */ firstmb = SIMPLEQ_FIRST(&list); @@ -1819,11 +1822,8 @@ myx_txeof(struct myx_softc *sc, u_int32_ myx_buf_put(&sc->sc_tx_buf_free, mb); } while (++sc->sc_tx_count != done_count); - if (free) { - KERNEL_LOCK(); - sc->sc_tx_free += free; - KERNEL_UNLOCK(); - } + if (free) + atomic_add_int(&sc->sc_tx_free, free); } void @@ -1889,7 +1889,9 @@ myx_rxeof(struct myx_softc *sc) } ifp->if_ipackets += ml_len(&ml); + ether_input_list(ifp, &ml); +#if 0 KERNEL_LOCK(); #if NBPFILTER > 0 if (ifp->if_bpf) { @@ -1901,6 +1903,7 @@ myx_rxeof(struct myx_softc *sc) while ((m = ml_dequeue(&ml)) != NULL) ether_input_mbuf(ifp, m); KERNEL_UNLOCK(); +#endif } void Index: kern/init_main.c =================================================================== RCS file: /cvs/src/sys/kern/init_main.c,v retrieving revision 1.235 diff -u -p -r1.235 init_main.c --- kern/init_main.c 10 Feb 2015 05:28:18 -0000 1.235 +++ kern/init_main.c 20 Feb 2015 05:24:41 -0000 @@ -123,6 +123,8 @@ extern struct timeout setperf_to; void setperf_auto(void *); #endif +extern struct timeout pool_gc_tick; + extern struct user *proc0paddr; struct vnode *rootvp, *swapdev_vp; @@ -548,6 +550,11 @@ main(void *framep) #ifndef SMALL_KERNEL timeout_set(&setperf_to, setperf_auto, NULL); #endif + + /* + * Start the idle pool page garbage collector + */ + timeout_add_sec(&pool_gc_tick, 1); /* * proc0: nothing to do, back to sleep Index: kern/init_sysent.c =================================================================== RCS file: /cvs/src/sys/kern/init_sysent.c,v retrieving revision 1.164 diff -u -p -r1.164 init_sysent.c --- kern/init_sysent.c 9 Feb 2015 13:55:04 -0000 1.164 +++ kern/init_sysent.c 20 Feb 2015 05:24:41 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: init_sysent.c,v 1.164 2015/02/09 13:55:04 deraadt Exp $ */ +/* $OpenBSD$ */ /* * System call switch table. @@ -179,7 +179,7 @@ struct sysent sysent[] = { sys_select }, /* 71 = select */ { 6, s(struct sys_kevent_args), 0, sys_kevent }, /* 72 = kevent */ - { 2, s(struct sys_munmap_args), 0, + { 2, s(struct sys_munmap_args), SY_NOLOCK | 0, sys_munmap }, /* 73 = munmap */ { 3, s(struct sys_mprotect_args), 0, sys_mprotect }, /* 74 = mprotect */ @@ -432,7 +432,7 @@ struct sysent sysent[] = { sys_setrlimit }, /* 195 = setrlimit */ { 0, 0, 0, sys_nosys }, /* 196 = obsolete ogetdirentries48 */ - { 7, s(struct sys_mmap_args), 0, + { 7, s(struct sys_mmap_args), SY_NOLOCK | 0, sys_mmap }, /* 197 = mmap */ { 0, 0, 0, sys_nosys }, /* 198 = __syscall (indir) */ Index: kern/kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v retrieving revision 1.283 diff -u -p -r1.283 kern_sysctl.c --- kern/kern_sysctl.c 11 Feb 2015 05:09:33 -0000 1.283 +++ kern/kern_sysctl.c 20 Feb 2015 05:24:41 -0000 @@ -509,7 +509,8 @@ kern_sysctl(int *name, u_int namelen, vo case KERN_NPROCS: return (sysctl_rdint(oldp, oldlenp, newp, nprocesses)); case KERN_POOL: - return (sysctl_dopool(name + 1, namelen - 1, oldp, oldlenp)); + return (sysctl_pool(name + 1, namelen - 1, oldp, oldlenp, + newp, newlen)); case KERN_STACKGAPRANDOM: stackgap = stackgap_random; error = sysctl_int(oldp, oldlenp, newp, newlen, &stackgap); Index: kern/subr_pool.c =================================================================== RCS file: /cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.180 diff -u -p -r1.180 subr_pool.c --- kern/subr_pool.c 10 Feb 2015 06:16:13 -0000 1.180 +++ kern/subr_pool.c 20 Feb 2015 05:24:41 -0000 @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include @@ -161,6 +163,14 @@ void pool_print1(struct pool *, const c #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0) +/* stale page garbage collectors */ +void pool_gc_sched(void *); +struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); +void pool_gc_pages(void *); +struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); +int pool_wait_free = 4; +int pool_wait_gc = 16; + static inline int phtree_compare(struct pool_item_header *a, struct pool_item_header *b) { @@ -233,7 +243,7 @@ pool_init(struct pool *pp, size_t size, size = roundup(size, align); if (palloc == NULL) { - while (size > pgsize) + while (size * 8 > pgsize) pgsize <<= 1; if (pgsize > PAGE_SIZE) { @@ -691,7 +701,7 @@ pool_put(struct pool *pp, void *v) /* is it time to free a page? */ if (pp->pr_nidle > pp->pr_maxpages && (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && - (ticks - ph->ph_tick) > hz) { + (ticks - ph->ph_tick) > (hz * pool_wait_free)) { freeph = ph; pool_p_remove(pp, freeph); } @@ -1282,11 +1292,13 @@ pool_walk(struct pool *pp, int full, * kern.pool.name. - the name for pool#. */ int -sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) +sysctl_pool(int *name, u_int namelen, char *oldp, size_t *oldlenp, + void *newp, size_t newlen) { struct kinfo_pool pi; struct pool *pp; int rv = ENOENT; + int wait; switch (name[0]) { case KERN_POOL_NPOOLS: @@ -1297,6 +1309,35 @@ sysctl_dopool(int *name, u_int namelen, case KERN_POOL_NAME: case KERN_POOL_POOL: break; + + case KERN_POOL_WAITFREE: + if (namelen != 1) + return (ENOTDIR); + wait = pool_wait_free; + rv = sysctl_int(oldp, oldlenp, newp, newlen, &wait); + if (rv != 0) + return (rv); + if (newp == NULL) + return (0); + if (wait < 1 || wait >= pool_wait_gc) + return (EINVAL); + pool_wait_free = wait; + return (0); + + case KERN_POOL_WAITGC: + if (namelen != 1) + return (ENOTDIR); + wait = pool_wait_gc; + rv = sysctl_int(oldp, oldlenp, newp, newlen, &wait); + if (rv != 0) + return (rv); + if (newp == NULL) + return (0); + if (wait <= pool_wait_free || wait > 3600) + return (EINVAL); + pool_wait_gc = wait; + return (0); + default: return (EOPNOTSUPP); } @@ -1350,6 +1391,47 @@ done: rw_exit_read(&pool_lock); return (rv); +} + +void +pool_gc_sched(void *null) +{ + task_add(systqmp, &pool_gc_task); +} + +void +pool_gc_pages(void *null) +{ + extern int ticks; + struct pool *pp; + struct pool_item_header *ph, *freeph; + int s; + + rw_enter_read(&pool_lock); + s = splvm(); + SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { + if (pp->pr_nidle <= pp->pr_minpages || /* guess */ + !mtx_enter_try(&pp->pr_mtx)) /* try */ + continue; + + /* is it time to free a page? */ + if (pp->pr_nidle > pp->pr_minpages && + (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && + (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { + freeph = ph; + pool_p_remove(pp, freeph); + } else + freeph = NULL; + + mtx_leave(&pp->pr_mtx); + + if (freeph != NULL) + pool_p_free(pp, freeph); + } + splx(s); /* XXX */ + rw_exit_read(&pool_lock); + + timeout_add_sec(&pool_gc_tick, 1); } /* Index: kern/syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/syscalls.c,v retrieving revision 1.164 diff -u -p -r1.164 syscalls.c --- kern/syscalls.c 9 Feb 2015 13:55:04 -0000 1.164 +++ kern/syscalls.c 20 Feb 2015 05:24:41 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: syscalls.c,v 1.164 2015/02/09 13:55:04 deraadt Exp $ */ +/* $OpenBSD$ */ /* * System call names. Index: kern/syscalls.master =================================================================== RCS file: /cvs/src/sys/kern/syscalls.master,v retrieving revision 1.152 diff -u -p -r1.152 syscalls.master --- kern/syscalls.master 9 Feb 2015 13:54:23 -0000 1.152 +++ kern/syscalls.master 20 Feb 2015 05:24:41 -0000 @@ -170,7 +170,7 @@ const struct kevent *changelist, int nchanges, \ struct kevent *eventlist, int nevents, \ const struct timespec *timeout); } -73 STD { int sys_munmap(void *addr, size_t len); } +73 STD NOLOCK { int sys_munmap(void *addr, size_t len); } 74 STD { int sys_mprotect(void *addr, size_t len, \ int prot); } 75 STD { int sys_madvise(void *addr, size_t len, \ @@ -341,7 +341,7 @@ 195 STD { int sys_setrlimit(int which, \ const struct rlimit *rlp); } 196 OBSOL ogetdirentries48 -197 STD { void *sys_mmap(void *addr, size_t len, int prot, \ +197 STD NOLOCK { void *sys_mmap(void *addr, size_t len, int prot, \ int flags, int fd, long pad, off_t pos); } 198 INDIR { quad_t sys___syscall(quad_t num, ...); } 199 STD { off_t sys_lseek(int fd, int pad, off_t offset, \ Index: kern/uipc_mbuf.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v retrieving revision 1.201 diff -u -p -r1.201 uipc_mbuf.c --- kern/uipc_mbuf.c 7 Feb 2015 02:52:09 -0000 1.201 +++ kern/uipc_mbuf.c 20 Feb 2015 05:24:41 -0000 @@ -156,7 +156,7 @@ mbinit(void) for (i = 0; i < nitems(mclsizes); i++) { snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk", mclsizes[i] >> 10); - pool_init(&mclpools[i], mclsizes[i], 0, 0, 0, + pool_init(&mclpools[i], mclsizes[i], 1024, 0, 0, mclnames[i], NULL); pool_setipl(&mclpools[i], IPL_NET); pool_set_constraints(&mclpools[i], &kp_dma_contig); @@ -169,6 +169,7 @@ mbinit(void) void nmbclust_update(void) { +#if 0 int i; /* * Set the hard limit on the mclpools to the number of @@ -187,6 +188,7 @@ nmbclust_update(void) */ pool_sethiwat(&mclpools[i], nmbclust); } +#endif pool_sethiwat(&mbpool, nmbclust); } Index: net/hfsc.c =================================================================== RCS file: /cvs/src/sys/net/hfsc.c,v retrieving revision 1.15 diff -u -p -r1.15 hfsc.c --- net/hfsc.c 8 Feb 2015 03:16:16 -0000 1.15 +++ net/hfsc.c 20 Feb 2015 05:24:41 -0000 @@ -121,6 +121,7 @@ struct hfsc_class *hfsc_clh2cph(struct h #define HFSC_HT_INFINITY 0xffffffffffffffffLL /* infinite time value */ +#if 0 u_int64_t hfsc_microuptime(void) { @@ -130,6 +131,7 @@ hfsc_microuptime(void) return (((u_int64_t)(tv.tv_sec) * 1000000 + tv.tv_usec) << HFSC_CLK_SHIFT); } +#endif static inline u_int hfsc_more_slots(u_int current) Index: net/hfsc.h =================================================================== RCS file: /cvs/src/sys/net/hfsc.h,v retrieving revision 1.6 diff -u -p -r1.6 hfsc.h --- net/hfsc.h 19 Apr 2014 16:02:17 -0000 1.6 +++ net/hfsc.h 20 Feb 2015 05:24:41 -0000 @@ -34,6 +34,7 @@ #define _HFSC_H_ #include +#include /* hfsc class flags */ #define HFSC_RED 0x0001 /* use RED */ @@ -250,7 +251,8 @@ struct hfsc_if { }; #define HFSC_CLK_SHIFT 8 -#define HFSC_FREQ (1000000 << HFSC_CLK_SHIFT) +//#define HFSC_FREQ (1000000 << HFSC_CLK_SHIFT) +#define HFSC_FREQ (curcpu()->ci_tsc_freq) #define HFSC_CLK_PER_TICK (HFSC_FREQ / hz) #define HFSC_ENABLED(ifq) ((ifq)->ifq_hfsc != NULL) @@ -263,7 +265,8 @@ int hfsc_detach(struct ifnet *); void hfsc_purge(struct ifqueue *); int hfsc_enqueue(struct ifqueue *, struct mbuf *); struct mbuf *hfsc_dequeue(struct ifqueue *, int); -u_int64_t hfsc_microuptime(void); +//u_int64_t hfsc_microuptime(void); +#define hfsc_microuptime() rdtsc(); int hfsc_addqueue(struct pf_queuespec *); int hfsc_delqueue(struct pf_queuespec *); int hfsc_qstats(struct pf_queuespec *, void *, int *); Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.319 diff -u -p -r1.319 if.c --- net/if.c 9 Feb 2015 03:09:57 -0000 1.319 +++ net/if.c 20 Feb 2015 05:24:41 -0000 @@ -127,7 +127,8 @@ void if_attachsetup(struct ifnet *); void if_attachdomain1(struct ifnet *); void if_attach_common(struct ifnet *); -void if_detach_queues(struct ifnet *, struct ifqueue *); +int if_detach_filter(void *, const struct mbuf *); +void if_detach_queues(struct ifnet *, struct niqueue *); void if_detached_start(struct ifnet *); int if_detached_ioctl(struct ifnet *, u_long, caddr_t); @@ -139,7 +140,6 @@ int if_setgroupattribs(caddr_t); int if_clone_list(struct if_clonereq *); struct if_clone *if_clone_lookup(const char *, int *); -void if_congestion_clear(void *); int if_group_egress_build(void); void if_link_state_change_task(void *); @@ -155,6 +155,11 @@ int if_cloners_count; struct timeout net_tick_to; void net_tick(void *); int net_livelocked(void); +int ifq_congestion; + +struct timeout net_tick_to; +void net_tick(void *); +struct taskq *softnettq; /* * Network interface utility routines. @@ -167,6 +172,11 @@ ifinit() { timeout_set(&net_tick_to, net_tick, &net_tick_to); + softnettq = taskq_create("softnet", 1, IPL_NET, + TASKQ_MPSAFE | TASKQ_CANTSLEEP); + if (softnettq == NULL) + panic("unable to create softnet taskq"); + net_tick(&net_tick_to); } @@ -415,7 +425,7 @@ if_start(struct ifnet *ifp) splassert(IPL_NET); - if (ifp->if_snd.ifq_len >= min(8, ifp->if_snd.ifq_maxlen) && + if (ifp->if_snd.ifq_len >= ifp->if_snd.ifq_maxlen && !ISSET(ifp->if_flags, IFF_OACTIVE)) { if (ISSET(ifp->if_xflags, IFXF_TXREADY)) { TAILQ_REMOVE(&iftxlist, ifp, if_txlist); @@ -541,7 +551,7 @@ if_detach(struct ifnet *ifp) */ #define IF_DETACH_QUEUES(x) \ do { \ - extern struct ifqueue x; \ + extern struct niqueue x; \ if_detach_queues(ifp, & x); \ } while (0) IF_DETACH_QUEUES(arpintrq); @@ -593,38 +603,31 @@ do { \ splx(s); } -void -if_detach_queues(struct ifnet *ifp, struct ifqueue *q) +int +if_detach_filter(void *ctx, const struct mbuf *m) { - struct mbuf *m, *prev = NULL, *next; - int prio; + struct ifnet *ifp = ctx; - for (prio = 0; prio <= IFQ_MAXPRIO; prio++) { - for (m = q->ifq_q[prio].head; m; m = next) { - next = m->m_nextpkt; #ifdef DIAGNOSTIC - if ((m->m_flags & M_PKTHDR) == 0) { - prev = m; - continue; - } + if ((m->m_flags & M_PKTHDR) == 0) + return (0); #endif - if (m->m_pkthdr.rcvif != ifp) { - prev = m; - continue; - } - if (prev) - prev->m_nextpkt = m->m_nextpkt; - else - q->ifq_q[prio].head = m->m_nextpkt; - if (q->ifq_q[prio].tail == m) - q->ifq_q[prio].tail = prev; - q->ifq_len--; - - m->m_nextpkt = NULL; - m_freem(m); - IF_DROP(q); - } + return (m->m_pkthdr.rcvif == ifp); +} + +void +if_detach_queues(struct ifnet *ifp, struct niqueue *niq) +{ + struct mbuf *m0, *m; + + m0 = niq_filter(niq, if_detach_filter, ifp); + while (m0 != NULL) { + m = m0; + m0 = m->m_nextpkt; + + m->m_nextpkt = NULL; + m_freem(m); } } @@ -787,33 +790,22 @@ if_clone_list(struct if_clonereq *ifcr) } /* - * set queue congestion marker and register timeout to clear it + * set queue congestion marker */ void -if_congestion(struct ifqueue *ifq) +if_congestion(void) { - /* Not currently needed, all callers check this */ - if (ifq->ifq_congestion) - return; + extern int ticks; - ifq->ifq_congestion = malloc(sizeof(struct timeout), M_TEMP, M_NOWAIT); - if (ifq->ifq_congestion == NULL) - return; - timeout_set(ifq->ifq_congestion, if_congestion_clear, ifq); - timeout_add(ifq->ifq_congestion, hz / 100); + ifq_congestion = ticks; } -/* - * clear the congestion flag - */ -void -if_congestion_clear(void *arg) +int +if_congested(void) { - struct ifqueue *ifq = arg; - struct timeout *to = ifq->ifq_congestion; + extern int ticks; - ifq->ifq_congestion = NULL; - free(to, M_TEMP, sizeof(*to)); + return (ticks - ifq_congestion <= (hz / 100)); } #define equal(a1, a2) \ @@ -2143,6 +2135,28 @@ sysctl_ifq(int *name, u_int namelen, voi /* NOTREACHED */ } +int +sysctl_niq(int *name, u_int namelen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen, struct niqueue *niq) +{ + /* All sysctl names at this level are terminal. */ + if (namelen != 1) + return (ENOTDIR); + + switch (name[0]) { + case IFQCTL_LEN: + return (sysctl_rdint(oldp, oldlenp, newp, niq_len(niq))); + case IFQCTL_MAXLEN: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &niq->ni_q.mq_maxlen)); /* XXX */ + case IFQCTL_DROPS: + return (sysctl_rdint(oldp, oldlenp, newp, niq_drops(niq))); + default: + return (EOPNOTSUPP); + } + /* NOTREACHED */ +} + void ifa_add(struct ifnet *ifp, struct ifaddr *ifa) { @@ -2295,7 +2309,7 @@ if_rxr_adjust_cwm(struct if_rxring *rxr) rxr->rxr_cwm--; else return; - } else if (rxr->rxr_alive >= rxr->rxr_lwm) + } else if (rxr->rxr_alive > 4) return; else if (rxr->rxr_cwm < rxr->rxr_hwm) rxr->rxr_cwm++; @@ -2361,4 +2375,43 @@ if_rxr_ioctl(struct if_rxrinfo *ifri, co ifr.ifr_info = *rxr; return (if_rxr_info_ioctl(ifri, 1, &ifr)); +} + +/* + * Network stack input queues. + */ + +void +niq_init(struct niqueue *niq, u_int maxlen, u_int isr) +{ + mq_init(&niq->ni_q, maxlen, IPL_NET); + niq->ni_isr = isr; +} + +int +niq_enqueue(struct niqueue *niq, struct mbuf *m) +{ + int rv; + + rv = mq_enqueue(&niq->ni_q, m); + if (rv == 0) + schednetisr(niq->ni_isr); + else + if_congestion(); + + return (rv); +} + +int +niq_enlist(struct niqueue *niq, struct mbuf_list *ml) +{ + int rv; + + rv = mq_enlist(&niq->ni_q, ml); + if (rv == 0) + schednetisr(niq->ni_isr); + else + if_congestion(); + + return (rv); } Index: net/if.h =================================================================== RCS file: /cvs/src/sys/net/if.h,v retrieving revision 1.160 diff -u -p -r1.160 if.h --- net/if.h 8 Feb 2015 06:00:52 -0000 1.160 +++ net/if.h 20 Feb 2015 05:24:41 -0000 @@ -463,6 +463,8 @@ void if_group_routechange(struct sockadd struct ifnet *ifunit(const char *); struct ifnet *if_get(unsigned int); void ifnewlladdr(struct ifnet *); +void if_congestion(void); +int if_congested(void); #endif /* _KERNEL */ Index: net/if_ethersubr.c =================================================================== RCS file: /cvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.189 diff -u -p -r1.189 if_ethersubr.c --- net/if_ethersubr.c 16 Feb 2015 18:24:02 -0000 1.189 +++ net/if_ethersubr.c 20 Feb 2015 05:24:41 -0000 @@ -86,6 +86,8 @@ didn't get a copy, you may request one f #include #include #include +#include +#include #include /* required by if_trunk.h */ @@ -450,6 +452,55 @@ bad: return (error); } +void ether_input_process(void *); + +struct mbuf_queue ether_input_queue = MBUF_QUEUE_INITIALIZER(8192, IPL_NET); +struct task ether_input_task = + TASK_INITIALIZER(ether_input_process, ðer_input_queue); + +void +ether_input_list(struct ifnet *ifp, struct mbuf_list *ml) +{ + mq_enlist(ðer_input_queue, ml); + task_add(softnettq, ðer_input_task); +} + +void +ether_input_mbuf(struct ifnet *ifp, struct mbuf *m) +{ + mq_enqueue(ðer_input_queue, m); + task_add(softnettq, ðer_input_task); +} + +void +ether_input_process(void *xmq) +{ + struct mbuf_queue *mq = xmq; + struct mbuf *m0, *m; + u_int mit = 0; + int s; + + m0 = mq_dechain(mq); + if (m0 == NULL) + return; + + KERNEL_LOCK(); + s = splnet(); + do { + if ((++mit & 0x7f) == 0) + yield(); + + m = m0; + m0 = m->m_nextpkt; + m->m_nextpkt = NULL; + + ether_input(m->m_pkthdr.rcvif, NULL, m); + } while (m0 != NULL); + splx(s); + KERNEL_UNLOCK(); +} + + /* * Process a received Ethernet packet; * the packet is in the mbuf chain m without @@ -459,9 +510,9 @@ int ether_input(struct ifnet *ifp0, void *hdr, struct mbuf *m) { struct ether_header *eh = hdr; - struct ifqueue *inq; + struct niqueue *inq; u_int16_t etype; - int s, llcfound = 0; + int llcfound = 0; struct llc *l; struct arpcom *ac; struct ifnet *ifp = ifp0; @@ -607,22 +658,15 @@ ether_input(struct ifnet *ifp0, void *hd } } - /* - * Schedule softnet interrupt and enqueue packet within the same spl. - */ - s = splnet(); decapsulate: - switch (etype) { case ETHERTYPE_IP: - schednetisr(NETISR_IP); inq = &ipintrq; break; case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) goto dropanyway; - schednetisr(NETISR_ARP); inq = &arpintrq; break; @@ -630,14 +674,13 @@ decapsulate: if (ifp->if_flags & IFF_NOARP) goto dropanyway; revarpinput(m); /* XXX queue? */ - goto done; + return (1); #ifdef INET6 /* * Schedule IPv6 software interrupt for incoming IPv6 packet. */ case ETHERTYPE_IPV6: - schednetisr(NETISR_IPV6); inq = &ip6intrq; break; #endif /* INET6 */ @@ -645,14 +688,12 @@ decapsulate: case ETHERTYPE_PPPOEDISC: case ETHERTYPE_PPPOE: #ifndef PPPOE_SERVER - if (m->m_flags & (M_MCAST | M_BCAST)) { - m_freem(m); - goto done; - } + if (m->m_flags & (M_MCAST | M_BCAST)) + goto dropanyway; #endif M_PREPEND(m, sizeof(*eh), M_DONTWAIT); if (m == NULL) - goto done; + return (1); eh_tmp = mtod(m, struct ether_header *); /* @@ -667,7 +708,7 @@ decapsulate: if ((session = pipex_pppoe_lookup_session(m)) != NULL) { pipex_pppoe_input(m, session); - goto done; + return (1); } } #endif @@ -675,15 +716,12 @@ decapsulate: inq = &pppoediscinq; else inq = &pppoeinq; - - schednetisr(NETISR_PPPOE); break; #endif #ifdef MPLS case ETHERTYPE_MPLS: case ETHERTYPE_MPLS_MCAST: inq = &mplsintrq; - schednetisr(NETISR_MPLS); break; #endif default: @@ -702,21 +740,19 @@ decapsulate: m_adj(m, 6); M_PREPEND(m, sizeof(*eh), M_DONTWAIT); if (m == NULL) - goto done; + return (1); *mtod(m, struct ether_header *) = *eh; goto decapsulate; } - goto dropanyway; - dropanyway: default: - m_freem(m); - goto done; + goto dropanyway; } } - IF_INPUT_ENQUEUE(inq, m); -done: - splx(s); + niq_enqueue(inq, m); + return (1); +dropanyway: + m_freem(m); return (1); } Index: net/if_loop.c =================================================================== RCS file: /cvs/src/sys/net/if_loop.c,v retrieving revision 1.63 diff -u -p -r1.63 if_loop.c --- net/if_loop.c 27 Jan 2015 10:20:31 -0000 1.63 +++ net/if_loop.c 20 Feb 2015 05:24:41 -0000 @@ -204,8 +204,7 @@ int looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct rtentry *rt) { - int s, isr; - struct ifqueue *ifq = 0; + struct niqueue *ifq = NULL; if ((m->m_flags & M_PKTHDR) == 0) panic("looutput: no header mbuf"); @@ -232,18 +231,15 @@ looutput(struct ifnet *ifp, struct mbuf case AF_INET: ifq = &ipintrq; - isr = NETISR_IP; break; #ifdef INET6 case AF_INET6: ifq = &ip6intrq; - isr = NETISR_IPV6; break; #endif /* INET6 */ #ifdef MPLS case AF_MPLS: ifq = &mplsintrq; - isr = NETISR_MPLS; break; #endif /* MPLS */ default: @@ -252,18 +248,10 @@ looutput(struct ifnet *ifp, struct mbuf m_freem(m); return (EAFNOSUPPORT); } - s = splnet(); - if (IF_QFULL(ifq)) { - IF_DROP(ifq); - m_freem(m); - splx(s); + + if (niq_enqueue(ifq, m) != 0) return (ENOBUFS); - } - IF_ENQUEUE(ifq, m); - schednetisr(isr); - ifp->if_ipackets++; - ifp->if_ibytes += m->m_pkthdr.len; - splx(s); + return (0); } Index: net/if_mpe.c =================================================================== RCS file: /cvs/src/sys/net/if_mpe.c,v retrieving revision 1.41 diff -u -p -r1.41 if_mpe.c --- net/if_mpe.c 22 Dec 2014 11:05:53 -0000 1.41 +++ net/if_mpe.c 20 Feb 2015 05:24:41 -0000 @@ -355,7 +355,7 @@ mpe_input(struct mbuf *m, struct ifnet * u_int8_t ttl) { struct ip *ip; - int s, hlen; + int hlen; /* label -> AF lookup */ @@ -392,10 +392,8 @@ mpe_input(struct mbuf *m, struct ifnet * if (ifp && ifp->if_bpf) bpf_mtap_af(ifp->if_bpf, AF_INET, m, BPF_DIRECTION_IN); #endif - s = splnet(); - IF_INPUT_ENQUEUE(&ipintrq, m); - schednetisr(NETISR_IP); - splx(s); + + niq_enqueue(&ipintrq, m); } #ifdef INET6 @@ -404,7 +402,6 @@ mpe_input6(struct mbuf *m, struct ifnet u_int8_t ttl) { struct ip6_hdr *ip6hdr; - int s; /* label -> AF lookup */ @@ -427,10 +424,8 @@ mpe_input6(struct mbuf *m, struct ifnet if (ifp && ifp->if_bpf) bpf_mtap_af(ifp->if_bpf, AF_INET6, m, BPF_DIRECTION_IN); #endif - s = splnet(); - IF_INPUT_ENQUEUE(&ip6intrq, m); - schednetisr(NETISR_IPV6); - splx(s); + + niq_enqueue(&ip6intrq, m); } #endif /* INET6 */ Index: net/if_ppp.c =================================================================== RCS file: /cvs/src/sys/net/if_ppp.c,v retrieving revision 1.80 diff -u -p -r1.80 if_ppp.c --- net/if_ppp.c 19 Dec 2014 17:14:39 -0000 1.80 +++ net/if_ppp.c 20 Feb 2015 05:24:41 -0000 @@ -232,7 +232,7 @@ ppp_clone_create(struct if_clone *ifc, i sc->sc_if.if_output = pppoutput; sc->sc_if.if_start = ppp_ifstart; IFQ_SET_MAXLEN(&sc->sc_if.if_snd, IFQ_MAXLEN); - IFQ_SET_MAXLEN(&sc->sc_inq, IFQ_MAXLEN); + mq_init(&sc->sc_inq, IFQ_MAXLEN, IPL_NET); IFQ_SET_MAXLEN(&sc->sc_fastq, IFQ_MAXLEN); IFQ_SET_MAXLEN(&sc->sc_rawq, IFQ_MAXLEN); IFQ_SET_READY(&sc->sc_if.if_snd); @@ -329,12 +329,8 @@ pppdealloc(struct ppp_softc *sc) break; m_freem(m); } - for (;;) { - IF_DEQUEUE(&sc->sc_inq, m); - if (m == NULL) - break; + while ((m = mq_dequeue(&sc->sc_inq)) != NULL) m_freem(m); - } for (;;) { IF_DEQUEUE(&sc->sc_fastq, m); if (m == NULL) @@ -398,7 +394,7 @@ pppioctl(struct ppp_softc *sc, u_long cm switch (cmd) { case FIONREAD: - *(int *)data = IFQ_LEN(&sc->sc_inq); + *(int *)data = mq_len(&sc->sc_inq); break; case PPPIOCGUNIT: @@ -1225,7 +1221,6 @@ static void ppp_inproc(struct ppp_softc *sc, struct mbuf *m) { struct ifnet *ifp = &sc->sc_if; - struct ifqueue *inq; int s, ilen, xlen, proto, rv; u_char *cp, adrs, ctrl; struct mbuf *mp, *dmp = NULL; @@ -1462,45 +1457,33 @@ ppp_inproc(struct ppp_softc *sc, struct m->m_pkthdr.len -= PPP_HDRLEN; m->m_data += PPP_HDRLEN; m->m_len -= PPP_HDRLEN; - schednetisr(NETISR_IP); - inq = &ipintrq; + + if (niq_enqueue(&ipintrq, m) != 0) { + ifp->if_iqdrops++; + goto dropped; + } break; default: /* * Some other protocol - place on input queue for read(). */ - inq = &sc->sc_inq; - rv = 1; + if (mq_enqueue(&sc->sc_inq, m) != 0) { + ifp->if_iqdrops++; + goto dropped; + } else + (*sc->sc_ctlp)(sc); break; } - /* - * Put the packet on the appropriate input queue. - */ - s = splnet(); - if (IF_QFULL(inq)) { - IF_DROP(inq); - splx(s); - if (sc->sc_flags & SC_DEBUG) - printf("%s: input queue full\n", ifp->if_xname); - ifp->if_iqdrops++; - if (!inq->ifq_congestion) - if_congestion(inq); - goto bad; - } - IF_ENQUEUE(inq, m); - splx(s); ifp->if_ipackets++; ifp->if_ibytes += ilen; - if (rv) - (*sc->sc_ctlp)(sc); - return; bad: m_freem(m); + dropped: sc->sc_if.if_ierrors++; sc->sc_stats.ppp_ierrors++; } Index: net/if_pppoe.c =================================================================== RCS file: /cvs/src/sys/net/if_pppoe.c,v retrieving revision 1.43 diff -u -p -r1.43 if_pppoe.c --- net/if_pppoe.c 5 Dec 2014 15:50:04 -0000 1.43 +++ net/if_pppoe.c 20 Feb 2015 05:24:41 -0000 @@ -147,8 +147,8 @@ struct pppoe_softc { }; /* incoming traffic will be queued here */ -struct ifqueue pppoediscinq; -struct ifqueue pppoeinq; +struct niqueue pppoediscinq = NIQUEUE_INITIALIZER(IFQ_MAXLEN, NETISR_PPPOE); +struct niqueue pppoeinq = NIQUEUE_INITIALIZER(IFQ_MAXLEN, NETISR_PPPOE); /* input routines */ static void pppoe_disc_input(struct mbuf *); @@ -201,9 +201,6 @@ pppoeattach(int count) { LIST_INIT(&pppoe_softc_list); if_clone_attach(&pppoe_cloner); - - IFQ_SET_MAXLEN(&pppoediscinq, IFQ_MAXLEN); - IFQ_SET_MAXLEN(&pppoeinq, IFQ_MAXLEN); } /* Create a new interface. */ @@ -360,27 +357,14 @@ void pppoeintr(void) { struct mbuf *m; - int s; splsoftassert(IPL_SOFTNET); - - for (;;) { - s = splnet(); - IF_DEQUEUE(&pppoediscinq, m); - splx(s); - if (m == NULL) - break; + + while ((m = niq_dequeue(&pppoediscinq)) != NULL) pppoe_disc_input(m); - } - for (;;) { - s = splnet(); - IF_DEQUEUE(&pppoeinq, m); - splx(s); - if (m == NULL) - break; + while ((m = niq_dequeue(&pppoeinq)) != NULL) pppoe_data_input(m); - } } /* Analyze and handle a single received packet while not in session state. */ Index: net/if_pppoe.h =================================================================== RCS file: /cvs/src/sys/net/if_pppoe.h,v retrieving revision 1.5 diff -u -p -r1.5 if_pppoe.h --- net/if_pppoe.h 28 Aug 2008 13:10:54 -0000 1.5 +++ net/if_pppoe.h 20 Feb 2015 05:24:41 -0000 @@ -66,8 +66,8 @@ struct pppoeconnectionstate { #ifdef _KERNEL -extern struct ifqueue pppoediscinq; -extern struct ifqueue pppoeinq; +extern struct niqueue pppoediscinq; +extern struct niqueue pppoeinq; void pppoeintr(void); Index: net/if_pppvar.h =================================================================== RCS file: /cvs/src/sys/net/if_pppvar.h,v retrieving revision 1.15 diff -u -p -r1.15 if_pppvar.h --- net/if_pppvar.h 7 Dec 2003 15:41:27 -0000 1.15 +++ net/if_pppvar.h 20 Feb 2015 05:24:41 -0000 @@ -98,7 +98,7 @@ struct ppp_softc { u_int16_t sc_mru; /* max receive unit */ pid_t sc_xfer; /* used in transferring unit */ struct ifqueue sc_rawq; /* received packets */ - struct ifqueue sc_inq; /* queue of input packets for daemon */ + struct mbuf_queue sc_inq; /* queue of input packets for daemon */ struct ifqueue sc_fastq; /* interactive output packet q */ struct mbuf *sc_togo; /* output packet ready to go */ struct mbuf *sc_npqueue; /* output packets not to be sent yet */ Index: net/if_pppx.c =================================================================== RCS file: /cvs/src/sys/net/if_pppx.c,v retrieving revision 1.36 diff -u -p -r1.36 if_pppx.c --- net/if_pppx.c 10 Feb 2015 21:56:10 -0000 1.36 +++ net/if_pppx.c 20 Feb 2015 05:24:41 -0000 @@ -317,9 +317,9 @@ pppxwrite(dev_t dev, struct uio *uio, in /* struct pppx_dev *pxd = pppx_dev2pxd(dev); */ struct pppx_hdr *th; struct mbuf *top, **mp, *m; - struct ifqueue *ifq; + struct niqueue *ifq; int tlen, mlen; - int isr, s, error = 0; + int error = 0; if (uio->uio_resid < sizeof(*th) || uio->uio_resid > MCLBYTES) return (EMSGSIZE); @@ -381,12 +381,10 @@ pppxwrite(dev_t dev, struct uio *uio, in switch (ntohl(th->pppx_proto)) { case AF_INET: ifq = &ipintrq; - isr = NETISR_IP; break; #ifdef INET6 case AF_INET6: ifq = &ip6intrq; - isr = NETISR_IPV6; break; #endif default: @@ -394,16 +392,8 @@ pppxwrite(dev_t dev, struct uio *uio, in return (EAFNOSUPPORT); } - s = splnet(); - if (IF_QFULL(ifq)) { - IF_DROP(ifq); - splx(s); - m_freem(top); + if (niq_enqueue(ifq, m) != 0) return (ENOBUFS); - } - IF_ENQUEUE(ifq, top); - schednetisr(isr); - splx(s); return (error); } Index: net/if_spppsubr.c =================================================================== RCS file: /cvs/src/sys/net/if_spppsubr.c,v retrieving revision 1.130 diff -u -p -r1.130 if_spppsubr.c --- net/if_spppsubr.c 27 Jan 2015 03:17:36 -0000 1.130 +++ net/if_spppsubr.c 20 Feb 2015 05:24:41 -0000 @@ -437,11 +437,10 @@ void sppp_input(struct ifnet *ifp, struct mbuf *m) { struct ppp_header ht; - struct ifqueue *inq = 0; + struct niqueue *inq = NULL; struct sppp *sp = (struct sppp *)ifp; struct timeval tv; int debug = ifp->if_flags & IFF_DEBUG; - int s; if (ifp->if_flags & IFF_UP) { /* Count received bytes, add hardware framing */ @@ -458,9 +457,10 @@ sppp_input(struct ifnet *ifp, struct mbu SPP_FMT "input packet is too small, %d bytes\n", SPP_ARGS(ifp), m->m_pkthdr.len); drop: + m_freem (m); + dropped: ++ifp->if_ierrors; ++ifp->if_iqdrops; - m_freem (m); return; } @@ -538,7 +538,6 @@ sppp_input(struct ifnet *ifp, struct mbu return; case PPP_IP: if (sp->state[IDX_IPCP] == STATE_OPENED) { - schednetisr (NETISR_IP); inq = &ipintrq; sp->pp_last_activity = tv.tv_sec; } @@ -551,7 +550,6 @@ sppp_input(struct ifnet *ifp, struct mbu return; case PPP_IPV6: if (sp->state[IDX_IPV6CP] == STATE_OPENED) { - schednetisr (NETISR_IPV6); inq = &ip6intrq; sp->pp_last_activity = tv.tv_sec; } @@ -580,12 +578,10 @@ sppp_input(struct ifnet *ifp, struct mbu m_freem (m); return; case ETHERTYPE_IP: - schednetisr (NETISR_IP); inq = &ipintrq; break; #ifdef INET6 case ETHERTYPE_IPV6: - schednetisr (NETISR_IPV6); inq = &ip6intrq; break; #endif @@ -605,21 +601,13 @@ sppp_input(struct ifnet *ifp, struct mbu if (! (ifp->if_flags & IFF_UP) || ! inq) goto drop; - /* Check queue. */ - s = splnet(); - if (IF_QFULL (inq)) { + if (niq_enqueue(inq, m) != 0) { /* Queue overflow. */ - IF_DROP(inq); - splx(s); if (debug) log(LOG_DEBUG, SPP_FMT "protocol queue overflow\n", SPP_ARGS(ifp)); - if (!inq->ifq_congestion) - if_congestion(inq); - goto drop; + goto dropped; } - IF_ENQUEUE(inq, m); - splx(s); } /* Index: net/if_tun.c =================================================================== RCS file: /cvs/src/sys/net/if_tun.c,v retrieving revision 1.132 diff -u -p -r1.132 if_tun.c --- net/if_tun.c 10 Feb 2015 21:56:10 -0000 1.132 +++ net/if_tun.c 20 Feb 2015 05:24:41 -0000 @@ -781,10 +781,9 @@ tunwrite(dev_t dev, struct uio *uio, int { struct tun_softc *tp; struct ifnet *ifp; - struct ifqueue *ifq; + struct niqueue *ifq; u_int32_t *th; struct mbuf *top, **mp, *m; - int isr; int error=0, s, tlen, mlen; if ((tp = tun_lookup(minor(dev))) == NULL) @@ -888,12 +887,10 @@ tunwrite(dev_t dev, struct uio *uio, int switch (ntohl(*th)) { case AF_INET: ifq = &ipintrq; - isr = NETISR_IP; break; #ifdef INET6 case AF_INET6: ifq = &ip6intrq; - isr = NETISR_IPV6; break; #endif default: @@ -901,21 +898,14 @@ tunwrite(dev_t dev, struct uio *uio, int return (EAFNOSUPPORT); } - s = splnet(); - if (IF_QFULL(ifq)) { - IF_DROP(ifq); - splx(s); + if (niq_enqueue(ifq, m) != 0) { ifp->if_collisions++; - m_freem(top); - if (!ifq->ifq_congestion) - if_congestion(ifq); return (ENOBUFS); } - IF_ENQUEUE(ifq, top); - schednetisr(isr); + ifp->if_ipackets++; ifp->if_ibytes += top->m_pkthdr.len; - splx(s); + return (error); } Index: net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.20 diff -u -p -r1.20 if_var.h --- net/if_var.h 9 Feb 2015 03:09:57 -0000 1.20 +++ net/if_var.h 20 Feb 2015 05:24:41 -0000 @@ -37,6 +37,8 @@ #define _NET_IF_VAR_H_ #include +#include +#include #ifdef _KERNEL #include #endif @@ -68,8 +70,6 @@ #include -struct mbuf; -struct mbuf_list; struct proc; struct rtentry; struct socket; @@ -108,7 +108,7 @@ struct ifqueue { int ifq_maxlen; int ifq_drops; struct hfsc_if *ifq_hfsc; - struct timeout *ifq_congestion; + struct mutex ifq_mtx; }; /* @@ -320,8 +320,7 @@ do { \ if (IF_QFULL(ifq)) { \ IF_DROP(ifq); \ m_freem(m); \ - if (!(ifq)->ifq_congestion) \ - if_congestion(ifq); \ + if_congestion(); \ } else \ IF_ENQUEUE(ifq, m); \ } while (/* CONSTCOND */0) @@ -393,14 +392,37 @@ do { \ #define IF_WIRED_DEFAULT_PRIORITY 0 #define IF_WIRELESS_DEFAULT_PRIORITY 4 +/* + * Network stack input queues. + */ +struct niqueue { + struct mbuf_queue ni_q; + u_int ni_isr; +}; + +#define NIQUEUE_INITIALIZER(_len, _isr) \ + { MBUF_QUEUE_INITIALIZER((_len), IPL_NET), (_isr) } + +void niq_init(struct niqueue *, u_int, u_int); +int niq_enqueue(struct niqueue *, struct mbuf *); +int niq_enlist(struct niqueue *, struct mbuf_list *); + +#define niq_dequeue(_q) mq_dequeue(&(_q)->ni_q) +#define niq_dechain(_q) mq_dechain(&(_q)->ni_q) +#define niq_delist(_q, _ml) mq_delist(&(_q)->ni_q, (_ml)) +#define niq_filter(_q, _f, _c) mq_filter(&(_q)->ni_q, (_f), (_c)) +#define niq_len(_q) mq_len(&(_q)->ni_q) +#define niq_drops(_q) mq_drops(&(_q)->ni_q) + extern struct ifnet_head ifnet; extern struct ifnet *lo0ifp; +extern struct taskq *softnettq; +void ether_input_list(struct ifnet *, struct mbuf_list *); +void ether_input_mbuf(struct ifnet *, struct mbuf *); void if_start(struct ifnet *); void if_input(struct ifnet *, struct mbuf_list *); -#define ether_input_mbuf(ifp, m) ether_input((ifp), NULL, (m)) - void ether_ifattach(struct ifnet *); void ether_ifdetach(struct ifnet *); int ether_ioctl(struct ifnet *, struct arpcom *, u_long, caddr_t); @@ -423,9 +445,10 @@ void if_clone_detach(struct if_clone *); int if_clone_create(const char *); int if_clone_destroy(const char *); -void if_congestion(struct ifqueue *); int sysctl_ifq(int *, u_int, void *, size_t *, void *, size_t, struct ifqueue *); +int sysctl_niq(int *, u_int, void *, size_t *, void *, size_t, + struct niqueue *); int loioctl(struct ifnet *, u_long, caddr_t); void loopattach(int); Index: net/pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.906 diff -u -p -r1.906 pf.c --- net/pf.c 14 Feb 2015 23:32:41 -0000 1.906 +++ net/pf.c 20 Feb 2015 05:24:41 -0000 @@ -222,7 +222,6 @@ int pf_compare_state_keys(struct pf_s struct pf_state *pf_find_state(struct pfi_kif *, struct pf_state_key_cmp *, u_int, struct mbuf *); int pf_src_connlimit(struct pf_state **); -int pf_check_congestion(struct ifqueue *); int pf_match_rcvif(struct mbuf *, struct pf_rule *); void pf_step_into_anchor(int *, struct pf_ruleset **, struct pf_rule **, struct pf_rule **); @@ -3077,7 +3076,6 @@ pf_test_rule(struct pf_pdesc *pd, struct struct tcphdr *th = pd->hdr.tcp; struct pf_state_key *skw = NULL, *sks = NULL; struct pf_rule_actions act; - struct ifqueue *ifq = &ipintrq; u_short reason; int rewrite = 0; int tag = -1; @@ -3092,12 +3090,7 @@ pf_test_rule(struct pf_pdesc *pd, struct act.rtableid = pd->rdomain; SLIST_INIT(&rules); -#ifdef INET6 - if (pd->af == AF_INET6) - ifq = &ip6intrq; -#endif - - if (pd->dir == PF_IN && pf_check_congestion(ifq)) { + if (pd->dir == PF_IN && if_congested()) { REASON_SET(&reason, PFRES_CONGEST); return (PF_DROP); } @@ -6638,15 +6631,6 @@ done: } return (action); -} - -int -pf_check_congestion(struct ifqueue *ifq) -{ - if (ifq->ifq_congestion) - return (1); - else - return (0); } void Index: net/pipex.c =================================================================== RCS file: /cvs/src/sys/net/pipex.c,v retrieving revision 1.65 diff -u -p -r1.65 pipex.c --- net/pipex.c 19 Dec 2014 17:14:40 -0000 1.65 +++ net/pipex.c 20 Feb 2015 05:24:41 -0000 @@ -1161,7 +1161,7 @@ pipex_ip_input(struct mbuf *m0, struct p { struct ifnet *ifp; struct ip *ip; - int s, len; + int len; int is_idle; /* change recvif */ @@ -1223,29 +1223,21 @@ pipex_ip_input(struct mbuf *m0, struct p bpf_mtap_af(ifp->if_bpf, AF_INET, m0, BPF_DIRECTION_IN); #endif - s = splnet(); - if (IF_QFULL(&ipintrq)) { - IF_DROP(&ipintrq); + if (niq_enqueue(&ipintrq, m0) != 0) { ifp->if_collisions++; - if (!ipintrq.ifq_congestion) - if_congestion(&ipintrq); - splx(s); - goto drop; + goto dropped; } - IF_ENQUEUE(&ipintrq, m0); - schednetisr(NETISR_IP); ifp->if_ipackets++; ifp->if_ibytes += len; session->stat.ipackets++; session->stat.ibytes += len; - splx(s); - return; drop: if (m0 != NULL) m_freem(m0); +dropped: session->stat.ierrors++; } @@ -1255,7 +1247,7 @@ pipex_ip6_input(struct mbuf *m0, struct { struct ifnet *ifp; struct ip6_hdr *ip6; - int s, len; + int len; /* change recvif */ m0->m_pkthdr.rcvif = session->pipex_iface->ifnet_this; @@ -1298,29 +1290,18 @@ pipex_ip6_input(struct mbuf *m0, struct bpf_mtap_af(ifp->if_bpf, AF_INET6, m0, BPF_DIRECTION_IN); #endif - s = splnet(); - if (IF_QFULL(&ip6intrq)) { - IF_DROP(&ip6intrq); + if (niq_enqueue(&ip6intrq, m0) != 0) { ifp->if_collisions++; - if (!ip6intrq.ifq_congestion) - if_congestion(&ip6intrq); - splx(s); - goto drop; + goto dropped; } - IF_ENQUEUE(&ip6intrq, m0); - schednetisr(NETISR_IPV6); ifp->if_ipackets++; ifp->if_ibytes += len; session->stat.ipackets++; session->stat.ibytes += len; - splx(s); - return; -drop: - if (m0 != NULL) - m_freem(m0); +dropped: session->stat.ierrors++; } #endif Index: net/ppp_tty.c =================================================================== RCS file: /cvs/src/sys/net/ppp_tty.c,v retrieving revision 1.31 diff -u -p -r1.31 ppp_tty.c --- net/ppp_tty.c 10 Feb 2015 21:56:10 -0000 1.31 +++ net/ppp_tty.c 20 Feb 2015 05:24:41 -0000 @@ -303,7 +303,9 @@ pppread(struct tty *tp, struct uio *uio, splx(s); return 0; } - if (!IF_IS_EMPTY(&sc->sc_inq)) + /* Get the packet from the input queue */ + m0 = mq_dequeue(&sc->sc_inq); + if (m0 != NULL) break; if ((tp->t_state & TS_CARR_ON) == 0 && (tp->t_cflag & CLOCAL) == 0 && (tp->t_state & TS_ISOPEN)) { @@ -323,9 +325,6 @@ pppread(struct tty *tp, struct uio *uio, /* Pull place-holder byte out of canonical queue */ getc(&tp->t_canq); - - /* Get the packet from the input queue */ - IF_DEQUEUE(&sc->sc_inq, m0); splx(s); for (m = m0; m && uio->uio_resid; m = m->m_next) Index: netinet/if_ether.c =================================================================== RCS file: /cvs/src/sys/netinet/if_ether.c,v retrieving revision 1.146 diff -u -p -r1.146 if_ether.c --- netinet/if_ether.c 11 Feb 2015 23:34:43 -0000 1.146 +++ netinet/if_ether.c 20 Feb 2015 05:24:41 -0000 @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -89,7 +90,8 @@ void in_arpinput(struct mbuf *); LIST_HEAD(, llinfo_arp) llinfo_arp; struct pool arp_pool; /* pool for llinfo_arp structures */ -struct ifqueue arpintrq; +/* XXX hate magic numbers */ +struct niqueue arpintrq = NIQUEUE_INITIALIZER(50, NETISR_ARP); int arp_inuse, arp_allocated; int arp_maxtries = 5; int arpinit_done; @@ -151,7 +153,6 @@ arp_rtrequest(int req, struct rtentry *r arpinit_done = 1; pool_init(&arp_pool, sizeof(struct llinfo_arp), 0, 0, 0, "arp", NULL); - IFQ_SET_MAXLEN(&arpintrq, 50); /* XXX hate magic numbers */ /* * We generate expiration times from time.tv_sec * so avoid accidently creating permanent routes. @@ -499,14 +500,9 @@ arpintr(void) { struct mbuf *m; struct arphdr *ar; - int s, len; + int len; - for (;;) { - s = splnet(); - IF_DEQUEUE(&arpintrq, m); - splx(s); - if (m == NULL) - break; + while ((m = niq_dequeue(&arpintrq)) != NULL) { #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) panic("arpintr"); Index: netinet/if_ether.h =================================================================== RCS file: /cvs/src/sys/netinet/if_ether.h,v retrieving revision 1.54 diff -u -p -r1.54 if_ether.h --- netinet/if_ether.h 5 Dec 2014 15:50:04 -0000 1.54 +++ netinet/if_ether.h 20 Feb 2015 05:24:41 -0000 @@ -199,7 +199,7 @@ struct sockaddr_inarp { extern u_int8_t etherbroadcastaddr[ETHER_ADDR_LEN]; extern u_int8_t ether_ipmulticast_min[ETHER_ADDR_LEN]; extern u_int8_t ether_ipmulticast_max[ETHER_ADDR_LEN]; -extern struct ifqueue arpintrq; +extern struct niqueue arpintrq; void arpwhohas(struct arpcom *, struct in_addr *); void arpintr(void); Index: netinet/in.h =================================================================== RCS file: /cvs/src/sys/netinet/in.h,v retrieving revision 1.112 diff -u -p -r1.112 in.h --- netinet/in.h 9 Feb 2015 12:18:19 -0000 1.112 +++ netinet/in.h 20 Feb 2015 05:24:41 -0000 @@ -785,7 +785,7 @@ __END_DECLS #ifdef _KERNEL extern int inetctlerrmap[]; -extern struct ifqueue ipintrq; /* ip packet input queue */ +extern struct niqueue ipintrq; /* ip packet input queue */ extern struct in_addr zeroin_addr; struct mbuf; Index: netinet/ip_divert.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_divert.c,v retrieving revision 1.32 diff -u -p -r1.32 ip_divert.c --- netinet/ip_divert.c 24 Jan 2015 00:29:06 -0000 1.32 +++ netinet/ip_divert.c 20 Feb 2015 05:24:41 -0000 @@ -82,11 +82,10 @@ int divert_output(struct inpcb *inp, struct mbuf *m, struct mbuf *nam, struct mbuf *control) { - struct ifqueue *inq; struct sockaddr_in *sin; struct socket *so; struct ifaddr *ifa; - int s, error = 0, min_hdrlen = 0, dir; + int error = 0, min_hdrlen = 0, dir; struct ip *ip; u_int16_t off; @@ -149,8 +148,6 @@ divert_output(struct inpcb *inp, struct } m->m_pkthdr.rcvif = ifa->ifa_ifp; - inq = &ipintrq; - /* * Recalculate IP and protocol checksums for the inbound packet * since the userspace application may have modified the packet @@ -160,10 +157,7 @@ divert_output(struct inpcb *inp, struct ip->ip_sum = in_cksum(m, off); in_proto_cksum_out(m, NULL); - s = splnet(); - IF_INPUT_ENQUEUE(inq, m); - schednetisr(NETISR_IP); - splx(s); + niq_enqueue(&ipintrq, m); } else { error = ip_output(m, NULL, &inp->inp_route, IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL, 0); Index: netinet/ip_ether.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_ether.c,v retrieving revision 1.70 diff -u -p -r1.70 ip_ether.c --- netinet/ip_ether.c 19 Dec 2014 17:14:40 -0000 1.70 +++ netinet/ip_ether.c 20 Feb 2015 05:24:41 -0000 @@ -280,8 +280,6 @@ void mplsip_decap(struct mbuf *m, int iphlen) { struct gif_softc *sc; - struct ifqueue *ifq; - int s; etheripstat.etherip_ipackets++; @@ -330,22 +328,12 @@ mplsip_decap(struct mbuf *m, int iphlen) pf_pkt_addr_changed(m); #endif - ifq = &mplsintrq; - s = splnet(); - if (IF_QFULL(ifq)) { - IF_DROP(ifq); - m_freem(m); + if (niq_enqueue(&mplsintrq, m) != 0) { etheripstat.etherip_qfull++; - splx(s); DPRINTF(("mplsip_input(): packet dropped because of full " "queue\n")); - return; } - IF_ENQUEUE(ifq, m); - schednetisr(NETISR_MPLS); - splx(s); - return; } #endif Index: netinet/ip_gre.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_gre.c,v retrieving revision 1.52 diff -u -p -r1.52 ip_gre.c --- netinet/ip_gre.c 19 Dec 2014 17:14:40 -0000 1.52 +++ netinet/ip_gre.c 20 Feb 2015 05:24:41 -0000 @@ -93,8 +93,7 @@ int gre_input2(struct mbuf *m, int hlen, u_char proto) { struct greip *gip; - int s; - struct ifqueue *ifq; + struct niqueue *ifq; struct gre_softc *sc; u_short flags; u_int af; @@ -160,7 +159,6 @@ gre_input2(struct mbuf *m, int hlen, u_c #ifdef INET6 case ETHERTYPE_IPV6: ifq = &ip6intrq; - schednetisr(NETISR_IPV6); af = AF_INET6; break; #endif @@ -173,7 +171,6 @@ gre_input2(struct mbuf *m, int hlen, u_c case ETHERTYPE_MPLS: case ETHERTYPE_MPLS_MCAST: ifq = &mplsintrq; - schednetisr(NETISR_MPLS); af = AF_MPLS; break; #endif @@ -201,9 +198,7 @@ gre_input2(struct mbuf *m, int hlen, u_c pf_pkt_addr_changed(m); #endif - s = splnet(); /* possible */ - IF_INPUT_ENQUEUE(ifq, m); - splx(s); + niq_enqueue(ifq, m); return (1); /* packet is done, no further processing needed */ } @@ -263,9 +258,8 @@ gre_mobile_input(struct mbuf *m, ...) { struct ip *ip; struct mobip_h *mip; - struct ifqueue *ifq; struct gre_softc *sc; - int hlen, s; + int hlen; va_list ap; u_char osrc = 0; int msiz; @@ -331,16 +325,12 @@ gre_mobile_input(struct mbuf *m, ...) ip->ip_sum = 0; ip->ip_sum = in_cksum(m,(ip->ip_hl << 2)); - ifq = &ipintrq; - #if NBPFILTER > 0 if (sc->sc_if.if_bpf) bpf_mtap_af(sc->sc_if.if_bpf, AF_INET, m, BPF_DIRECTION_IN); #endif - s = splnet(); /* possible */ - IF_INPUT_ENQUEUE(ifq, m); - splx(s); + niq_enqueue(&ipintrq, m); } /* Index: netinet/ip_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.246 diff -u -p -r1.246 ip_input.c --- netinet/ip_input.c 9 Feb 2015 12:18:19 -0000 1.246 +++ netinet/ip_input.c 20 Feb 2015 05:24:41 -0000 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -114,7 +115,7 @@ int ip_frags = 0; int *ipctl_vars[IPCTL_MAXID] = IPCTL_VARS; -struct ifqueue ipintrq; +struct niqueue ipintrq = NIQUEUE_INITIALIZER(IFQ_MAXLEN, NETISR_IP); struct pool ipqent_pool; struct pool ipq_pool; @@ -170,7 +171,6 @@ ip_init(void) pr->pr_protocol < IPPROTO_MAX) ip_protox[pr->pr_protocol] = pr - inetsw; LIST_INIT(&ipq); - IFQ_SET_MAXLEN(&ipintrq, IFQ_MAXLEN); if (ip_mtudisc != 0) ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout); @@ -193,18 +193,12 @@ void ipintr(void) { struct mbuf *m; - int s; - for (;;) { - /* - * Get next datagram off input queue and get IP header - * in first mbuf. - */ - s = splnet(); - IF_DEQUEUE(&ipintrq, m); - splx(s); - if (m == NULL) - return; + /* + * Get next datagram off input queue and get IP header + * in first mbuf. + */ + while ((m = niq_dequeue(&ipintrq)) != NULL) { #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) panic("ipintr no HDR"); @@ -1617,7 +1611,7 @@ ip_sysctl(int *name, u_int namelen, void ipsec_def_comp, sizeof(ipsec_def_comp))); case IPCTL_IFQUEUE: - return (sysctl_ifq(name + 1, namelen - 1, + return (sysctl_niq(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, &ipintrq)); case IPCTL_STATS: return (sysctl_rdstruct(oldp, oldlenp, newp, Index: netinet/ip_ipip.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_ipip.c,v retrieving revision 1.56 diff -u -p -r1.56 ip_ipip.c --- netinet/ip_ipip.c 19 Dec 2014 17:14:40 -0000 1.56 +++ netinet/ip_ipip.c 20 Feb 2015 05:24:41 -0000 @@ -146,15 +146,14 @@ ipip_input(struct mbuf *m, int iphlen, s struct sockaddr_in *sin; struct ifnet *ifp; struct ifaddr *ifa; - struct ifqueue *ifq = NULL; + struct niqueue *ifq = NULL; struct ip *ipo; u_int rdomain; #ifdef INET6 struct sockaddr_in6 *sin6; struct ip6_hdr *ip6; #endif - int isr; - int mode, hlen, s; + int mode, hlen; u_int8_t itos, otos; u_int8_t v; sa_family_t af; @@ -352,13 +351,11 @@ ipip_input(struct mbuf *m, int iphlen, s switch (proto) { case IPPROTO_IPV4: ifq = &ipintrq; - isr = NETISR_IP; af = AF_INET; break; #ifdef INET6 case IPPROTO_IPV6: ifq = &ip6intrq; - isr = NETISR_IPV6; af = AF_INET6; break; #endif @@ -374,23 +371,12 @@ ipip_input(struct mbuf *m, int iphlen, s pf_pkt_addr_changed(m); #endif - s = splnet(); /* isn't it already? */ - if (IF_QFULL(ifq)) { - IF_DROP(ifq); - m_freem(m); + if (niq_enqueue(ifq, m) != 0) { ipipstat.ipips_qfull++; - - splx(s); - DPRINTF(("ipip_input(): packet dropped because of full " "queue\n")); return; } - - IF_ENQUEUE(ifq, m); - schednetisr(isr); - splx(s); - return; } int Index: netinet/ipsec_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ipsec_input.c,v retrieving revision 1.126 diff -u -p -r1.126 ipsec_input.c --- netinet/ipsec_input.c 24 Jan 2015 00:29:06 -0000 1.126 +++ netinet/ipsec_input.c 20 Feb 2015 05:24:41 -0000 @@ -830,28 +830,18 @@ ah4_input(struct mbuf *m, ...) int ah4_input_cb(struct mbuf *m, ...) { - struct ifqueue *ifq = &ipintrq; - int s = splnet(); - /* * Interface pointer is already in first mbuf; chop off the * `outer' header and reschedule. */ - if (IF_QFULL(ifq)) { - IF_DROP(ifq); + if (niq_enqueue(&ipintrq, m) != 0) { ahstat.ahs_qfull++; - splx(s); - - m_freem(m); DPRINTF(("ah4_input_cb(): dropped packet because of full " "IP queue\n")); return ENOBUFS; } - IF_ENQUEUE(ifq, m); - schednetisr(NETISR_IP); - splx(s); return 0; } @@ -886,27 +876,17 @@ esp4_input(struct mbuf *m, ...) int esp4_input_cb(struct mbuf *m, ...) { - struct ifqueue *ifq = &ipintrq; - int s = splnet(); - /* * Interface pointer is already in first mbuf; chop off the * `outer' header and reschedule. */ - if (IF_QFULL(ifq)) { - IF_DROP(ifq); + if (niq_enqueue(&ipintrq, m) != 0) { espstat.esps_qfull++; - splx(s); - - m_freem(m); DPRINTF(("esp4_input_cb(): dropped packet because of full " "IP queue\n")); return ENOBUFS; } - IF_ENQUEUE(ifq, m); - schednetisr(NETISR_IP); - splx(s); return 0; } @@ -928,26 +908,15 @@ ipcomp4_input(struct mbuf *m, ...) int ipcomp4_input_cb(struct mbuf *m, ...) { - struct ifqueue *ifq = &ipintrq; - int s = splnet(); - /* * Interface pointer is already in first mbuf; chop off the * `outer' header and reschedule. */ - if (IF_QFULL(ifq)) { - IF_DROP(ifq); + if (niq_enqueue(&ipintrq, m) != 0) { ipcompstat.ipcomps_qfull++; - splx(s); - - m_freem(m); DPRINTF(("ipcomp4_input_cb(): dropped packet because of full IP queue\n")); return ENOBUFS; } - - IF_ENQUEUE(ifq, m); - schednetisr(NETISR_IP); - splx(s); return 0; } Index: netinet6/in6.h =================================================================== RCS file: /cvs/src/sys/netinet6/in6.h,v retrieving revision 1.80 diff -u -p -r1.80 in6.h --- netinet6/in6.h 9 Feb 2015 12:23:22 -0000 1.80 +++ netinet6/in6.h 20 Feb 2015 05:24:41 -0000 @@ -418,7 +418,7 @@ typedef __socklen_t socklen_t; /* length #ifdef _KERNEL extern u_char inet6ctlerrmap[]; -extern struct ifqueue ip6intrq; /* IP6 packet input queue */ +extern struct niqueue ip6intrq; /* IP6 packet input queue */ extern struct in6_addr zeroin6_addr; struct mbuf; Index: netinet6/ip6_divert.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v retrieving revision 1.32 diff -u -p -r1.32 ip6_divert.c --- netinet6/ip6_divert.c 24 Jan 2015 00:29:06 -0000 1.32 +++ netinet6/ip6_divert.c 20 Feb 2015 05:24:41 -0000 @@ -86,11 +86,10 @@ int divert6_output(struct inpcb *inp, struct mbuf *m, struct mbuf *nam, struct mbuf *control) { - struct ifqueue *inq; struct sockaddr_in6 *sin6; struct socket *so; struct ifaddr *ifa; - int s, error = 0, min_hdrlen = 0, nxt = 0, off, dir; + int error = 0, min_hdrlen = 0, nxt = 0, off, dir; struct ip6_hdr *ip6; m->m_pkthdr.rcvif = NULL; @@ -159,8 +158,6 @@ divert6_output(struct inpcb *inp, struct } m->m_pkthdr.rcvif = ifa->ifa_ifp; - inq = &ip6intrq; - /* * Recalculate the protocol checksum for the inbound packet * since the userspace application may have modified the packet @@ -168,10 +165,7 @@ divert6_output(struct inpcb *inp, struct */ in6_proto_cksum_out(m, NULL); - s = splnet(); - IF_INPUT_ENQUEUE(inq, m); - schednetisr(NETISR_IPV6); - splx(s); + niq_enqueue(&ip6intrq, m); /* return error on q full? */ } else { error = ip6_output(m, NULL, &inp->inp_route6, IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL, NULL); Index: netinet6/ip6_input.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_input.c,v retrieving revision 1.139 diff -u -p -r1.139 ip6_input.c --- netinet6/ip6_input.c 9 Feb 2015 12:23:22 -0000 1.139 +++ netinet6/ip6_input.c 20 Feb 2015 05:24:41 -0000 @@ -114,7 +114,7 @@ #endif struct in6_ifaddrhead in6_ifaddr; -struct ifqueue ip6intrq; +struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IFQ_MAXLEN, NETISR_IPV6); struct ip6stat ip6stat; @@ -145,7 +145,6 @@ ip6_init(void) pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW && pr->pr_protocol < IPPROTO_MAX) ip6_protox[pr->pr_protocol] = pr - inet6sw; - IFQ_SET_MAXLEN(&ip6intrq, IFQ_MAXLEN); TAILQ_INIT(&in6_ifaddr); ip6_randomid_init(); nd6_init(); @@ -169,17 +168,10 @@ ip6_init2(void *dummy) void ip6intr(void) { - int s; struct mbuf *m; - for (;;) { - s = splnet(); - IF_DEQUEUE(&ip6intrq, m); - splx(s); - if (m == NULL) - return; + while ((m = niq_dequeue(&ip6intrq)) != NULL) ip6_input(m); - } } extern struct route_in6 ip6_forward_rt; @@ -1453,7 +1445,7 @@ ip6_sysctl(int *name, u_int namelen, voi } return (error); case IPV6CTL_IFQUEUE: - return (sysctl_ifq(name + 1, namelen - 1, + return (sysctl_niq(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, &ip6intrq)); default: if (name[0] < IPV6CTL_MAXID) Index: netmpls/mpls.h =================================================================== RCS file: /cvs/src/sys/netmpls/mpls.h,v retrieving revision 1.29 diff -u -p -r1.29 mpls.h --- netmpls/mpls.h 15 Jan 2015 23:50:31 -0000 1.29 +++ netmpls/mpls.h 20 Feb 2015 05:24:41 -0000 @@ -159,7 +159,7 @@ void mpe_input6(struct mbuf *, struct if extern int mpls_raw_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, struct mbuf *, struct proc *); -extern struct ifqueue mplsintrq; /* MPLS input queue */ +extern struct niqueue mplsintrq; /* MPLS input queue */ extern int mpls_defttl; extern int mpls_mapttl_ip; extern int mpls_mapttl_ip6; Index: netmpls/mpls_input.c =================================================================== RCS file: /cvs/src/sys/netmpls/mpls_input.c,v retrieving revision 1.42 diff -u -p -r1.42 mpls_input.c --- netmpls/mpls_input.c 23 Dec 2014 03:24:08 -0000 1.42 +++ netmpls/mpls_input.c 20 Feb 2015 05:24:41 -0000 @@ -40,7 +40,7 @@ #include -struct ifqueue mplsintrq; +struct niqueue mplsintrq = NIQUEUE_INITIALIZER(IFQ_MAXLEN, NETISR_MPLS); #ifdef MPLS_DEBUG #define MPLS_LABEL_GET(l) ((ntohl((l) & MPLS_LABEL_MASK)) >> MPLS_LABEL_OFFSET) @@ -57,22 +57,15 @@ struct mbuf *mpls_do_error(struct mbuf * void mpls_init(void) { - IFQ_SET_MAXLEN(&mplsintrq, IFQ_MAXLEN); } void mplsintr(void) { struct mbuf *m; - int s; - for (;;) { - /* Get next datagram of input queue */ - s = splnet(); - IF_DEQUEUE(&mplsintrq, m); - splx(s); - if (m == NULL) - return; + /* Get next datagram of input queue */ + while ((m = niq_dequeue(&mplsintrq)) != NULL) { #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) panic("mplsintr no HDR"); @@ -91,7 +84,7 @@ mpls_input(struct mbuf *m) struct rtentry *rt = NULL; struct rt_mpls *rt_mpls; u_int8_t ttl; - int i, s, hasbos; + int i, hasbos; if (!ISSET(ifp->if_xflags, IFXF_MPLS)) { m_freem(m); @@ -158,10 +151,7 @@ mpls_input(struct mbuf *m) do_v4: if (mpls_ip_adjttl(m, ttl)) goto done; - s = splnet(); - IF_INPUT_ENQUEUE(&ipintrq, m); - schednetisr(NETISR_IP); - splx(s); + niq_enqueue(&ipintrq, m); goto done; } continue; @@ -171,10 +161,7 @@ do_v4: do_v6: if (mpls_ip6_adjttl(m, ttl)) goto done; - s = splnet(); - IF_INPUT_ENQUEUE(&ip6intrq, m); - schednetisr(NETISR_IPV6); - splx(s); + niq_enqueue(&ip6intrq, m); goto done; } continue; @@ -241,19 +228,13 @@ do_v6: case AF_INET: if (mpls_ip_adjttl(m, ttl)) break; - s = splnet(); - IF_INPUT_ENQUEUE(&ipintrq, m); - schednetisr(NETISR_IP); - splx(s); + niq_enqueue(&ipintrq, m); break; #ifdef INET6 case AF_INET6: if (mpls_ip6_adjttl(m, ttl)) break; - s = splnet(); - IF_INPUT_ENQUEUE(&ip6intrq, m); - schednetisr(NETISR_IPV6); - splx(s); + niq_enqueue(&ip6intrq, m); break; #endif default: Index: netmpls/mpls_raw.c =================================================================== RCS file: /cvs/src/sys/netmpls/mpls_raw.c,v retrieving revision 1.10 diff -u -p -r1.10 mpls_raw.c --- netmpls/mpls_raw.c 5 Dec 2014 15:50:04 -0000 1.10 +++ netmpls/mpls_raw.c 20 Feb 2015 05:24:41 -0000 @@ -135,7 +135,7 @@ mpls_sysctl(int *name, u_int namelen, vo switch (name[0]) { case MPLSCTL_IFQUEUE: - return (sysctl_ifq(name + 1, namelen - 1, + return (sysctl_niq(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, &mplsintrq)); default: return sysctl_int_arr(mplsctl_vars, name, namelen, Index: sys/pool.h =================================================================== RCS file: /cvs/src/sys/sys/pool.h,v retrieving revision 1.57 diff -u -p -r1.57 pool.h --- sys/pool.h 10 Feb 2015 06:16:13 -0000 1.57 +++ sys/pool.h 20 Feb 2015 05:24:41 -0000 @@ -39,10 +39,24 @@ * kern.pool.npools * kern.pool.name. * kern.pool.pool. + * kern.pool.wait_free + * kern.pool.wait_gc */ #define KERN_POOL_NPOOLS 1 #define KERN_POOL_NAME 2 #define KERN_POOL_POOL 3 +#define KERN_POOL_WAITFREE 4 +#define KERN_POOL_WAITGC 5 +#define KERN_POOL_MAXID 6 + +#define CTL_KERN_POOL_NAMES { \ + { 0, 0 }, \ + { "npools", CTLTYPE_INT }, \ + { "name", CTLTYPE_NODE }, \ + { "pool", CTLTYPE_NODE }, \ + { "wait_free", CTLTYPE_INT }, \ + { "wait_gc", CTLTYPE_INT } \ +} struct kinfo_pool { unsigned int pr_size; /* size of a pool item */ Index: sys/syscall.h =================================================================== RCS file: /cvs/src/sys/sys/syscall.h,v retrieving revision 1.163 diff -u -p -r1.163 syscall.h --- sys/syscall.h 9 Feb 2015 13:55:04 -0000 1.163 +++ sys/syscall.h 20 Feb 2015 05:24:41 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: syscall.h,v 1.163 2015/02/09 13:55:04 deraadt Exp $ */ +/* $OpenBSD$ */ /* * System call numbers. Index: sys/syscallargs.h =================================================================== RCS file: /cvs/src/sys/sys/syscallargs.h,v retrieving revision 1.165 diff -u -p -r1.165 syscallargs.h --- sys/syscallargs.h 9 Feb 2015 13:55:04 -0000 1.165 +++ sys/syscallargs.h 20 Feb 2015 05:24:41 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: syscallargs.h,v 1.165 2015/02/09 13:55:04 deraadt Exp $ */ +/* $OpenBSD$ */ /* * System call argument lists. Index: sys/sysctl.h =================================================================== RCS file: /cvs/src/sys/sys/sysctl.h,v retrieving revision 1.155 diff -u -p -r1.155 sysctl.h --- sys/sysctl.h 11 Feb 2015 05:09:33 -0000 1.155 +++ sys/sysctl.h 20 Feb 2015 05:24:41 -0000 @@ -952,7 +952,7 @@ int sysctl_vnode(char *, size_t *, struc #ifdef GPROF int sysctl_doprof(int *, u_int, void *, size_t *, void *, size_t); #endif -int sysctl_dopool(int *, u_int, char *, size_t *); +int sysctl_pool(int *, u_int, char *, size_t *, void *, size_t); int kern_sysctl(int *, u_int, void *, size_t *, void *, size_t, struct proc *); Index: uvm/uvm_addr.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_addr.c,v retrieving revision 1.11 diff -u -p -r1.11 uvm_addr.c --- uvm/uvm_addr.c 23 Dec 2014 02:01:57 -0000 1.11 +++ uvm/uvm_addr.c 20 Feb 2015 05:24:41 -0000 @@ -281,14 +281,19 @@ uvm_addr_init(void) { pool_init(&uaddr_pool, sizeof(struct uvm_addr_state), 0, 0, PR_WAITOK, "uaddr", NULL); + pool_setipl(&uaddr_pool, IPL_VM); pool_init(&uaddr_hint_pool, sizeof(struct uaddr_hint_state), 0, 0, PR_WAITOK, "uaddrhint", NULL); + pool_setipl(&uaddr_hint_pool, IPL_VM); pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state), 0, 0, PR_WAITOK, "uaddrbest", NULL); + pool_setipl(&uaddr_bestfit_pool, IPL_VM); pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state), 0, 0, PR_WAITOK, "uaddrpivot", NULL); + pool_setipl(&uaddr_pivot_pool, IPL_VM); pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state), 0, 0, PR_WAITOK, "uaddrrnd", NULL); + pool_setipl(&uaddr_rnd_pool, IPL_VM); uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE; uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE; Index: uvm/uvm_amap.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_amap.c,v retrieving revision 1.58 diff -u -p -r1.58 uvm_amap.c --- uvm/uvm_amap.c 23 Dec 2014 04:56:47 -0000 1.58 +++ uvm/uvm_amap.c 20 Feb 2015 05:24:41 -0000 @@ -716,6 +716,7 @@ void amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset) { int leftslots; + KERNEL_LOCK(); AMAP_B2SLOT(leftslots, offset); if (leftslots == 0) @@ -734,6 +735,7 @@ amap_splitref(struct vm_aref *origref, s splitref->ar_amap = origref->ar_amap; splitref->ar_amap->am_ref++; /* not a share reference */ splitref->ar_pageoff = origref->ar_pageoff + leftslots; + KERNEL_UNLOCK(); } #ifdef UVM_AMAP_PPREF Index: uvm/uvm_glue.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_glue.c,v retrieving revision 1.69 diff -u -p -r1.69 uvm_glue.c --- uvm/uvm_glue.c 15 Dec 2014 20:38:22 -0000 1.69 +++ uvm/uvm_glue.c 20 Feb 2015 05:24:41 -0000 @@ -464,11 +464,15 @@ uvm_atopg(vaddr_t kva) return (pg); } +#ifndef MULTIPROCESSOR +#define _kernel_lock_held() 1 +#endif + void uvm_pause(void) { static unsigned int toggle; - if (toggle++ > 128) { + if (toggle++ > 128 && _kernel_lock_held()) { toggle = 0; KERNEL_UNLOCK(); KERNEL_LOCK(); Index: uvm/uvm_map.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_map.c,v retrieving revision 1.186 diff -u -p -r1.186 uvm_map.c --- uvm/uvm_map.c 15 Feb 2015 21:34:33 -0000 1.186 +++ uvm/uvm_map.c 20 Feb 2015 05:24:41 -0000 @@ -1844,8 +1844,10 @@ uvm_unmap_kill_entry(struct vm_map *map, { /* Unwire removed map entry. */ if (VM_MAPENT_ISWIRED(entry)) { + KERNEL_LOCK(); entry->wired_count = 0; uvm_fault_unwire_locked(map, entry->start, entry->end); + KERNEL_UNLOCK(); } /* Entry-type specific code. */ @@ -2424,18 +2426,20 @@ void uvm_map_teardown(struct vm_map *map) { struct uvm_map_deadq dead_entries; - int i, waitok = 0; struct vm_map_entry *entry, *tmp; #ifdef VMMAP_DEBUG size_t numq, numt; #endif + int i; - if ((map->flags & VM_MAP_INTRSAFE) == 0) - waitok = 1; - if (waitok) { - if (rw_enter(&map->lock, RW_NOSLEEP | RW_WRITE) != 0) - panic("uvm_map_teardown: rw_enter failed on free map"); - } + KERNEL_ASSERT_LOCKED(); + KERNEL_UNLOCK(); + KERNEL_ASSERT_UNLOCKED(); + + KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); + + if (rw_enter(&map->lock, RW_NOSLEEP | RW_WRITE) != 0) + panic("uvm_map_teardown: rw_enter failed on free map"); /* Remove address selectors. */ uvm_addr_destroy(map->uaddr_exe); @@ -2468,8 +2472,7 @@ uvm_map_teardown(struct vm_map *map) if ((entry = RB_ROOT(&map->addr)) != NULL) DEAD_ENTRY_PUSH(&dead_entries, entry); while (entry != NULL) { - if (waitok) - uvm_pause(); + uvm_pause(); uvm_unmap_kill_entry(map, entry); if ((tmp = RB_LEFT(entry, daddrs.addr_entry)) != NULL) DEAD_ENTRY_PUSH(&dead_entries, tmp); @@ -2479,8 +2482,7 @@ uvm_map_teardown(struct vm_map *map) entry = TAILQ_NEXT(entry, dfree.deadq); } - if (waitok) - rw_exit(&map->lock); + rw_exit(&map->lock); #ifdef VMMAP_DEBUG numt = numq = 0; @@ -2490,7 +2492,10 @@ uvm_map_teardown(struct vm_map *map) numq++; KASSERT(numt == numq); #endif - uvm_unmap_detach(&dead_entries, waitok ? UVM_PLA_WAITOK : 0); + uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK); + + KERNEL_LOCK(); + pmap_destroy(map->pmap); map->pmap = NULL; } @@ -3187,6 +3192,8 @@ void uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, boolean_t pageable, boolean_t remove_holes) { + KASSERT(pmap == NULL || pmap == pmap_kernel()); + if (pmap) pmap_reference(pmap); else