Index: kern/kern_srp.c =================================================================== RCS file: /cvs/src/sys/kern/kern_srp.c,v retrieving revision 1.9 diff -u -p -r1.9 kern_srp.c --- kern/kern_srp.c 18 May 2016 03:58:13 -0000 1.9 +++ kern/kern_srp.c 30 May 2016 04:17:56 -0000 @@ -47,14 +47,11 @@ srp_init(struct srp *srp) srp->ref = NULL; } -void -srp_update_locked(struct srp_gc *srp_gc, struct srp *srp, void *nv) +void * +srp_swap_locked(struct srp *srp, void *nv) { void *ov; - if (nv != NULL) - refcnt_take(&srp_gc->srp_gc_refcnt); - /* * this doesn't have to be as careful as the caller has already * prevented concurrent updates, eg. by holding the kernel lock. @@ -63,8 +60,20 @@ srp_update_locked(struct srp_gc *srp_gc, ov = srp->ref; srp->ref = nv; - if (ov != NULL) - srp_v_gc_start(srp_gc, srp, ov); + + return (ov); +} + +void +srp_update_locked(struct srp_gc *srp_gc, struct srp *srp, void *v) +{ + if (v != NULL) + refcnt_take(&srp_gc->srp_gc_refcnt); + + v = srp_swap_locked(srp, v); + + if (v != NULL) + srp_v_gc_start(srp_gc, srp, v); } void * @@ -174,13 +183,19 @@ srp_v_gc(void *x) pool_put(&srp_gc_ctx_pool, ctx); } +void * +srp_swap(struct srp *srp, void *v) +{ + return (atomic_swap_ptr(&srp->ref, v)); +} + void srp_update(struct srp_gc *srp_gc, struct srp *srp, void *v) { if (v != NULL) refcnt_take(&srp_gc->srp_gc_refcnt); - v = atomic_swap_ptr(&srp->ref, v); + v = srp_swap(srp, v); if (v != NULL) srp_v_gc_start(srp_gc, srp, v); } @@ -207,7 +222,7 @@ srp_v(struct srp_hazard *hzrd, struct sr } void * -srp_enter(struct srp_ref *sr, struct srp *srp) +_srp_enter(const char *fn, unsigned int ln, struct srp_ref *sr, struct srp *srp) { struct cpu_info *ci = curcpu(); struct srp_hazard *hzrd; @@ -216,6 +231,8 @@ srp_enter(struct srp_ref *sr, struct srp for (i = 0; i < nitems(ci->ci_srp_hazards); i++) { hzrd = &ci->ci_srp_hazards[i]; if (hzrd->sh_p == NULL) { + hzrd->sh_f = fn; + hzrd->sh_l = ln; sr->hz = hzrd; return (srp_v(hzrd, srp)); } @@ -228,8 +245,10 @@ srp_enter(struct srp_ref *sr, struct srp } void * -srp_follow(struct srp_ref *sr, struct srp *srp) +_srp_follow(const char *fn, unsigned int ln, struct srp_ref *sr, struct srp *srp) { + sr->hz->sh_f = fn; + sr->hz->sh_l = ln; return (srp_v(sr->hz, srp)); } @@ -237,6 +256,33 @@ void srp_leave(struct srp_ref *sr) { sr->hz->sh_p = NULL; +} + +static inline int +srp_referenced(void *v) +{ + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + u_int i; + struct srp_hazard *hzrd; + + CPU_INFO_FOREACH(cii, ci) { + for (i = 0; i < nitems(ci->ci_srp_hazards); i++) { + hzrd = &ci->ci_srp_hazards[i]; + + if (hzrd->sh_p != NULL && hzrd->sh_v == v) + return (1); + } + } + + return (0); +} + +void +srp_finalize(void *v, const char *wmesg) +{ + while (srp_referenced(v)) + tsleep(v, PWAIT, wmesg, 1); } #else /* MULTIPROCESSOR */ Index: net/art.c =================================================================== RCS file: /cvs/src/sys/net/art.c,v retrieving revision 1.14 diff -u -p -r1.14 art.c --- net/art.c 13 Apr 2016 08:04:14 -0000 1.14 +++ net/art.c 30 May 2016 04:17:56 -0000 @@ -32,12 +32,13 @@ #include #include #include +#include #endif #include -#define ISLEAF(e) (((unsigned long)(e).node & 1) == 0) -#define SUBTABLE(e) (((struct art_table *)((unsigned long)(e).child & ~1))) +#define ISLEAF(e) (((unsigned long)(e) & 1) == 0) +#define SUBTABLE(e) ((struct art_table *)((unsigned long)(e) & ~1)) #define ASNODE(t) ((struct art_node *)((unsigned long)(t) | 1)) /* @@ -57,8 +58,7 @@ struct art_table { * is a route counter. */ union { - struct art_node *node; - struct art_table *child; + struct srp node; unsigned long count; } *at_heap; /* Array of 2^(slen+1) items */ }; @@ -78,22 +78,43 @@ struct art_node *art_table_insert(struc int, struct art_node *); struct art_node *art_table_delete(struct art_root *, struct art_table *, int, struct art_node *); -void art_table_ref(struct art_root *, struct art_table *); +struct art_table *art_table_ref(struct art_root *, struct art_table *); int art_table_free(struct art_root *, struct art_table *); int art_table_walk(struct art_root *, struct art_table *, int (*f)(struct art_node *, void *), void *); +int art_walk_apply(struct art_root *, + struct art_node *, struct art_node *, + int (*f)(struct art_node *, void *), void *); +void art_table_gc(void *); +void art_gc(void *); struct pool an_pool, at_pool, at_heap_4_pool, at_heap_8_pool; +struct art_table *art_table_gc_list = NULL; +struct mutex art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); +struct task art_table_gc_task = + TASK_INITIALIZER(art_table_gc, NULL); + +struct art_node *art_node_gc_list = NULL; +struct mutex art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); +struct task art_node_gc_task = TASK_INITIALIZER(art_gc, NULL); + void art_init(void) { pool_init(&an_pool, sizeof(struct art_node), 0, 0, 0, "art_node", NULL); + pool_setipl(&an_pool, IPL_SOFTNET); + pool_init(&at_pool, sizeof(struct art_table), 0, 0, 0, "art_table", NULL); + pool_setipl(&at_pool, IPL_SOFTNET); + pool_init(&at_heap_4_pool, AT_HEAPSIZE(4), 0, 0, 0, "art_heap4", NULL); + pool_setipl(&at_heap_4_pool, IPL_SOFTNET); + pool_init(&at_heap_8_pool, AT_HEAPSIZE(8), 0, 0, 0, "art_heap8", &pool_allocator_single); + pool_setipl(&at_heap_8_pool, IPL_SOFTNET); } /* @@ -131,6 +152,7 @@ art_alloc(unsigned int rtableid, unsigne ar->ar_off = off; ar->ar_rtableid = rtableid; + mtx_init(&ar->ar_lock, IPL_SOFTNET); return (ar); } @@ -230,41 +252,67 @@ art_findex(struct art_table *at, uint8_t * Return the best existing match for a destination. */ struct art_node * -art_match(struct art_root *ar, uint8_t *addr) +art_match(struct art_root *ar, uint8_t *addr, struct srp_ref *nsr) { + struct srp_ref noder, dfltr, ndfltr; + void *entry; struct art_table *at; - struct art_node *dflt = NULL; - int j; + struct art_node *dflt, *node; + int j; - at = ar->ar_root; - if (at == NULL) + entry = srp_enter(&noder, &ar->ar_root); + at = entry; + + if (at == NULL) { + srp_leave(&noder); return (NULL); + } + + node = srp_enter(&dfltr, &at->at_default); + dflt = node; /* * Iterate until we find a leaf. */ while (1) { - /* - * Rember the default route of this table in case - * we do not find a better matching route. - */ - if (at->at_default != NULL) - dflt = at->at_default; - /* Do a single level route lookup. */ j = art_findex(at, addr); + entry = srp_follow(&noder, &at->at_heap[j].node); /* If this is a leaf we're done. */ - if (ISLEAF(at->at_heap[j])) + if (ISLEAF(entry)) break; - at = SUBTABLE(at->at_heap[j]); - } - - if (at->at_heap[j].node != NULL) - return (at->at_heap[j].node); + at = SUBTABLE(entry); - return (dflt); + /* + * Remember the default route of this table in case + * we do not find a better matching route. + */ + node = srp_enter(&ndfltr, &at->at_default); + if (node != NULL) { + srp_leave(&dfltr); + dfltr = ndfltr; + dflt = node; + } else + srp_leave(&ndfltr); + } + + if (entry == NULL && dflt == NULL) { + srp_leave(&noder); + srp_leave(&dfltr); + return (NULL); + } else if (entry == NULL) { + srp_leave(&noder); + *nsr = dfltr; + KASSERT(ISLEAF(dflt)); + return (dflt); + } else { + srp_leave(&dfltr); + *nsr = noder; + KASSERT(ISLEAF(entry)); + return (entry); + } } /* @@ -274,21 +322,25 @@ art_match(struct art_root *ar, uint8_t * * it does not exist. */ struct art_node * -art_lookup(struct art_root *ar, uint8_t *addr, int plen) +art_lookup(struct art_root *ar, uint8_t *addr, int plen, struct srp_ref *nsr) { + void *entry; struct art_table *at; - struct art_node *an; int i, j; KASSERT(plen >= 0 && plen <= ar->ar_alen); - at = ar->ar_root; + entry = srp_enter(nsr, &ar->ar_root); + at = entry; + if (at == NULL) - return (NULL); + goto done; /* Default route */ - if (plen == 0) - return (at->at_default); + if (plen == 0) { + entry = srp_follow(nsr, &at->at_default); + goto done; + } /* * If the prefix length is smaller than the sum of @@ -298,24 +350,33 @@ art_lookup(struct art_root *ar, uint8_t while (plen > (at->at_offset + at->at_bits)) { /* Do a single level route lookup. */ j = art_findex(at, addr); + entry = srp_follow(nsr, &at->at_heap[j].node); /* A leaf is a match, but not a perfect one. */ - if (ISLEAF(at->at_heap[j])) + if (ISLEAF(entry)) { + srp_leave(nsr); return (NULL); + } - at = SUBTABLE(at->at_heap[j]); + at = SUBTABLE(entry); } i = art_bindex(at, addr, plen); - if (i == -1) + if (i == -1) { + srp_leave(nsr); return (NULL); + } - if (!ISLEAF(at->at_heap[i])) - an = SUBTABLE(at->at_heap[i])->at_default; - else - an = at->at_heap[i].node; + entry = srp_follow(nsr, &at->at_heap[i].node); + if (!ISLEAF(entry)) + entry = srp_follow(nsr, &SUBTABLE(entry)->at_default); - return (an); +done: + if (entry == NULL) + srp_leave(nsr); + + KASSERT(ISLEAF(entry)); + return (entry); } @@ -328,27 +389,30 @@ art_lookup(struct art_root *ar, uint8_t struct art_node * art_insert(struct art_root *ar, struct art_node *an, uint8_t *addr, int plen) { - struct art_table *at; + struct art_table *at, *child; + struct art_node *node; int i, j; + MUTEX_ASSERT_LOCKED(&ar->ar_lock); KASSERT(plen >= 0 && plen <= ar->ar_alen); - at = ar->ar_root; + at = srp_get_locked(&ar->ar_root); if (at == NULL) { at = art_table_get(ar, NULL, -1); if (at == NULL) return (NULL); - ar->ar_root = at; + srp_swap_locked(&ar->ar_root, at); } /* Default route */ if (plen == 0) { - if (at->at_default != NULL) - return (at->at_default); + node = srp_get_locked(&at->at_default); + if (node != NULL) + return (node); art_table_ref(ar, at); - at->at_default = an; + srp_swap_locked(&at->at_default, an); return (an); } @@ -360,6 +424,7 @@ art_insert(struct art_root *ar, struct a while (plen > (at->at_offset + at->at_bits)) { /* Do a single level route lookup. */ j = art_findex(at, addr); + node = srp_get_locked(&at->at_heap[j].node); /* * If the node corresponding to the fringe index is @@ -367,18 +432,16 @@ art_insert(struct art_root *ar, struct a * entry of this node will then become the default * route of the subtable. */ - if (ISLEAF(at->at_heap[j])) { - struct art_table *child; - + if (ISLEAF(node)) { child = art_table_get(ar, at, j); if (child == NULL) return (NULL); art_table_ref(ar, at); - at->at_heap[j].node = ASNODE(child); - } - - at = SUBTABLE(at->at_heap[j]); + srp_swap_locked(&at->at_heap[j].node, ASNODE(child)); + at = child; + } else + at = SUBTABLE(node); } i = art_bindex(at, addr, plen); @@ -395,12 +458,13 @@ struct art_node * art_table_insert(struct art_root *ar, struct art_table *at, int i, struct art_node *an) { - struct art_node *prev; + struct art_node *prev, *node; - if (!ISLEAF(at->at_heap[i])) - prev = SUBTABLE(at->at_heap[i])->at_default; + node = srp_get_locked(&at->at_heap[i].node); + if (!ISLEAF(node)) + prev = srp_get_locked(&SUBTABLE(node)->at_default); else - prev = at->at_heap[i].node; + prev = node; if (art_check_duplicate(ar, prev, an)) return (prev); @@ -414,10 +478,10 @@ art_table_insert(struct art_root *ar, st */ if (i < at->at_minfringe) art_allot(at, i, prev, an); - else if (!ISLEAF(at->at_heap[i])) - SUBTABLE(at->at_heap[i])->at_default = an; + else if (!ISLEAF(node)) + srp_swap_locked(&SUBTABLE(node)->at_default, an); else - at->at_heap[i].node = an; + srp_swap_locked(&at->at_heap[i].node, an); return (an); } @@ -430,21 +494,22 @@ struct art_node * art_delete(struct art_root *ar, struct art_node *an, uint8_t *addr, int plen) { struct art_table *at; - struct art_node *dflt; + struct art_node *node; int i, j; + MUTEX_ASSERT_LOCKED(&ar->ar_lock); KASSERT(plen >= 0 && plen <= ar->ar_alen); - at = ar->ar_root; + at = srp_get_locked(&ar->ar_root); if (at == NULL) return (NULL); /* Default route */ if (plen == 0) { - dflt = at->at_default; - at->at_default = NULL; + node = srp_get_locked(&at->at_default); + srp_swap_locked(&at->at_default, NULL); art_table_free(ar, at); - return (dflt); + return (node); } /* @@ -455,12 +520,13 @@ art_delete(struct art_root *ar, struct a while (plen > (at->at_offset + at->at_bits)) { /* Do a single level route lookup. */ j = art_findex(at, addr); + node = srp_get_locked(&at->at_heap[j].node); /* If this is a leaf, there is no route to delete. */ - if (ISLEAF(at->at_heap[j])) + if (ISLEAF(node)) return (NULL); - at = SUBTABLE(at->at_heap[j]); + at = SUBTABLE(node); } i = art_bindex(at, addr, plen); @@ -475,28 +541,27 @@ art_delete(struct art_root *ar, struct a */ struct art_node * art_table_delete(struct art_root *ar, struct art_table *at, int i, - struct art_node *node) + struct art_node *an) { - struct art_node *next; + struct art_node *next, *node; #ifdef DIAGNOSTIC struct art_node *prev; +#endif - if (!ISLEAF(at->at_heap[i])) - prev = SUBTABLE(at->at_heap[i])->at_default; + node = srp_get_locked(&at->at_heap[i].node); +#ifdef DIAGNOSTIC + if (!ISLEAF(node)) + prev = srp_get_locked(&SUBTABLE(node)->at_default); else - prev = at->at_heap[i].node; + prev = node; - KASSERT(prev == node); + KASSERT(prev == an); #endif - /* We are removing an entry from this table. */ - if (art_table_free(ar, at)) - return (node); - /* Get the next most specific route for the index `i'. */ if ((i >> 1) > 1) - next = at->at_heap[i >> 1].node; + next = srp_get_locked(&at->at_heap[i >> 1].node); else next = NULL; @@ -506,32 +571,46 @@ art_table_delete(struct art_root *ar, st * route pointer to all the corresponding fringe indices. */ if (i < at->at_minfringe) - art_allot(at, i, node, next); - else if (!ISLEAF(at->at_heap[i])) - SUBTABLE(at->at_heap[i])->at_default = next; + art_allot(at, i, an, next); + else if (!ISLEAF(node)) + srp_swap_locked(&SUBTABLE(node)->at_default, next); else - at->at_heap[i].node = next; + srp_swap_locked(&at->at_heap[i].node, next); - return (node); + art_table_free(ar, at); + + /* Caller must srp_finalize the node being deleted */ + return (an); } -void +struct art_table * art_table_ref(struct art_root *ar, struct art_table *at) { at->at_refcnt++; + + return (at); +} + +static inline int +art_table_rele(struct art_table *at) +{ + if (at == NULL) + return (0); + + return (--at->at_refcnt == 0); } int art_table_free(struct art_root *ar, struct art_table *at) { - if (--at->at_refcnt == 0) { + if (art_table_rele(at)) { /* * Garbage collect this table and all its parents * that are empty. */ do { at = art_table_put(ar, at); - } while (at != NULL && --at->at_refcnt == 0); + } while (art_table_rele(at)); return (1); } @@ -542,40 +621,65 @@ art_table_free(struct art_root *ar, stru /* * Iteration API function. */ + int art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg) { + struct srp_ref sr; struct art_table *at; - int error; + struct art_node *node; + int error = 0; - at = ar->ar_root; - if (at == NULL) - return (0); + mtx_enter(&ar->ar_lock); + at = srp_get_locked(&ar->ar_root); + if (at != NULL) { + art_table_ref(ar, at); - /* - * The default route should be processed here because the root - * table does not have a parent. - */ - if (at->at_default != NULL) { - error = (*f)(at->at_default, arg); - if (error) - return (error); + /* + * The default route should be processed here because the root + * table does not have a parent. + */ + node = srp_enter(&sr, &at->at_default); + error = art_walk_apply(ar, node, NULL, f, arg); + srp_leave(&sr); + + if (error == 0) + error = art_table_walk(ar, at, f, arg); + + art_table_free(ar, at); + } + mtx_leave(&ar->ar_lock); + + return (error); +} + +int +art_walk_apply(struct art_root *ar, + struct art_node *an, struct art_node *next, + int (*f)(struct art_node *, void *), void *arg) +{ + int error = 0; + + if ((an != NULL) && (an != next)) { + /* this assumes an->an_dst is not used by f */ + mtx_leave(&ar->ar_lock); + error = (*f)(an, arg); + mtx_enter(&ar->ar_lock); } - return (art_table_walk(ar, at, f, arg)); + return (error); } int art_table_walk(struct art_root *ar, struct art_table *at, int (*f)(struct art_node *, void *), void *arg) { - struct art_node *next, *an = NULL; + struct srp_ref nodesr; + struct art_node *node, *next; + struct art_table *nat; int i, j, error = 0; uint32_t maxfringe = (at->at_minfringe << 1); - /* Prevent this table to be freed while we're manipulating it. */ - art_table_ref(ar, at); - /* * Iterate non-fringe nodes in ``natural'' order. */ @@ -586,13 +690,14 @@ art_table_walk(struct art_root *ar, stru * be processed more than once. */ for (i = max(j, 2); i < at->at_minfringe; i <<= 1) { - next = at->at_heap[i >> 1].node; - an = at->at_heap[i].node; - if ((an != NULL) && (an != next)) { - error = (*f)(an, arg); - if (error) - goto out; - } + next = srp_get_locked(&at->at_heap[i >> 1].node); + + node = srp_enter(&nodesr, &at->at_heap[i].node); + error = art_walk_apply(ar, node, next, f, arg); + srp_leave(&nodesr); + + if (error) + return (error); } } @@ -600,31 +705,34 @@ art_table_walk(struct art_root *ar, stru * Iterate fringe nodes. */ for (i = at->at_minfringe; i < maxfringe; i++) { - next = at->at_heap[i >> 1].node; - if (!ISLEAF(at->at_heap[i])) - an = SUBTABLE(at->at_heap[i])->at_default; - else - an = at->at_heap[i].node; - if ((an != NULL) && (an != next)) { - error = (*f)(an, arg); - if (error) - goto out; - } + next = srp_get_locked(&at->at_heap[i >> 1].node); - if (ISLEAF(at->at_heap[i])) - continue; + node = srp_enter(&nodesr, &at->at_heap[i].node); + if (!ISLEAF(node)) { + nat = art_table_ref(ar, SUBTABLE(node)); + node = srp_follow(&nodesr, &nat->at_default); + } else + nat = NULL; - error = art_table_walk(ar, SUBTABLE(at->at_heap[i]), f, arg); - if (error) - break; + error = art_walk_apply(ar, node, next, f, arg); + srp_leave(&nodesr); + + if (error) { + art_table_free(ar, nat); + return (error); + } + + if (nat != NULL) { + error = art_table_walk(ar, nat, f, arg); + art_table_free(ar, nat); + if (error) + return (error); + } } -out: - art_table_free(ar, at); - return (error); + return (0); } - /* * Create a table and use the given index to set its default route. * @@ -634,6 +742,7 @@ struct art_table * art_table_get(struct art_root *ar, struct art_table *parent, int j) { struct art_table *at; + struct art_node *node; void *at_heap; uint32_t lvl; @@ -676,7 +785,9 @@ art_table_get(struct art_root *ar, struc at->at_refcnt = 0; if (parent != NULL) { - at->at_default = parent->at_heap[j].node; + node = srp_get_locked(&parent->at_heap[j].node); + /* node isn't being deleted, no srp_finalize needed */ + srp_swap_locked(&at->at_default, node); at->at_offset = (parent->at_offset + parent->at_bits); } @@ -693,36 +804,67 @@ struct art_table * art_table_put(struct art_root *ar, struct art_table *at) { struct art_table *parent = at->at_parent; - uint32_t lvl = at->at_level; uint32_t j = at->at_index; + MUTEX_ASSERT_LOCKED(&ar->ar_lock); + KASSERT(at->at_refcnt == 0); KASSERT(j != 0 && j != 1); - KASSERT(parent != NULL || j == -1); if (parent != NULL) { - KASSERT(lvl == parent->at_level + 1); + KASSERT(j != -1); + KASSERT(at->at_level == parent->at_level + 1); KASSERT(parent->at_refcnt >= 1); /* Give the route back to its parent. */ - parent->at_heap[j].node = at->at_default; - } else { - ar->ar_root = NULL; - } + srp_swap_locked(&parent->at_heap[j].node, &at->at_default); + } else { + KASSERT(j == -1); + KASSERT(at->at_level == 0); + srp_swap_locked(&ar->ar_root, NULL); + } + + mtx_enter(&art_table_gc_mtx); + at->at_parent = art_table_gc_list; + art_table_gc_list = at; + mtx_leave(&art_table_gc_mtx); + task_add(systqmp, &art_table_gc_task); - switch (AT_HEAPSIZE(at->at_bits)) { - case AT_HEAPSIZE(4): - pool_put(&at_heap_4_pool, at->at_heap); - break; - case AT_HEAPSIZE(8): - pool_put(&at_heap_8_pool, at->at_heap); - break; - default: - panic("incorrect stride length %u", at->at_bits); - } + return (parent); +} - pool_put(&at_pool, at); +void +art_table_gc(void *null) +{ + struct art_table *at, *next; - return (parent); + mtx_enter(&art_table_gc_mtx); + at = art_table_gc_list; + art_table_gc_list = NULL; + mtx_leave(&art_table_gc_mtx); + + while (at != NULL) { + next = at->at_parent; + + if (at->at_level == 0) + srp_finalize(at, "arttfini"); + else + srp_finalize(ASNODE(at), "arttfini"); + + switch (AT_HEAPSIZE(at->at_bits)) { + case AT_HEAPSIZE(4): + pool_put(&at_heap_4_pool, at->at_heap); + break; + case AT_HEAPSIZE(8): + pool_put(&at_heap_8_pool, at->at_heap); + break; + default: + panic("incorrect stride length %u", at->at_bits); + } + + pool_put(&at_pool, at); + + at = next; + } } /* @@ -752,7 +894,8 @@ void art_allot(struct art_table *at, int i, struct art_node *old, struct art_node *new) { - int k = i; + struct art_node *node, *dflt; + int k = i; KASSERT(i < at->at_minfringe); @@ -763,12 +906,17 @@ again: /* Change fringe nodes. */ while (1) { - if (!ISLEAF(at->at_heap[k])) { - if (SUBTABLE(at->at_heap[k])->at_default == old) { - SUBTABLE(at->at_heap[k])->at_default = new; + node = srp_get_locked(&at->at_heap[k].node); + if (!ISLEAF(node)) { + dflt = srp_get_locked(&SUBTABLE(node)->at_default); + if (dflt == old) { + /* old node will be finalized later */ + srp_swap_locked(&SUBTABLE(node)->at_default, + new); } - } else if (at->at_heap[k].node == old) { - at->at_heap[k].node = new; + } else if (node == old) { + /* old node will be finalized later */ + srp_swap_locked(&at->at_heap[k].node, new); } if (k % 2) goto moveup; @@ -776,7 +924,8 @@ again: } nonfringe: - if (at->at_heap[k].node == old) + node = srp_get_locked(&at->at_heap[k].node); + if (node == old) goto again; moveon: if (k % 2) @@ -785,7 +934,11 @@ moveon: goto nonfringe; moveup: k >>= 1; - at->at_heap[k].node = new; + /* + * are we sure this is changing from old->new? + * old node is finalized by caller + */ + srp_swap_locked(&at->at_heap[k].node, new); /* Change non-fringe node. */ if (k != i) @@ -803,6 +956,7 @@ art_get(struct sockaddr *dst, uint8_t pl an->an_dst = dst; an->an_plen = plen; + SRPL_INIT(&an->an_rtlist); return (an); } @@ -810,5 +964,32 @@ art_get(struct sockaddr *dst, uint8_t pl void art_put(struct art_node *an) { - pool_put(&an_pool, an); + KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist)); + + mtx_enter(&art_node_gc_mtx); + an->an_gc = art_node_gc_list; + art_node_gc_list = an; + mtx_leave(&art_node_gc_mtx); + + task_add(systqmp, &art_node_gc_task); +} + +void +art_gc(void *null) +{ + struct art_node *an, *next; + + mtx_enter(&art_node_gc_mtx); + an = art_node_gc_list; + art_node_gc_list = NULL; + mtx_leave(&art_node_gc_mtx); + + while (an != NULL) { + next = an->an_gc; + + srp_finalize(an, "artnfini"); + pool_put(&an_pool, an); + + an = next; + } } Index: net/art.h =================================================================== RCS file: /cvs/src/sys/net/art.h,v retrieving revision 1.12 diff -u -p -r1.12 art.h --- net/art.h 13 Apr 2016 08:04:14 -0000 1.12 +++ net/art.h 30 May 2016 04:17:56 -0000 @@ -19,13 +19,18 @@ #ifndef _NET_ART_H_ #define _NET_ART_H_ +#include +#include +#include + #define ART_MAXLVL 32 /* We currently use 32 levels for IPv6. */ /* * Root of the ART tables, equivalent to the radix head. */ struct art_root { - struct art_table *ar_root; /* First table */ + struct srp ar_root; /* First struct art_table */ + struct mutex ar_lock; /* Lock for update ops */ uint8_t ar_bits[ART_MAXLVL]; /* Per level stride */ uint8_t ar_nlvl; /* Number of levels */ uint8_t ar_alen; /* Address length in bits */ @@ -44,18 +49,25 @@ struct rtentry; */ struct art_node { SRPL_HEAD(, rtentry) an_rtlist; /* Route related to this node */ - struct sockaddr *an_dst; /* Destination address (key) */ + union { + struct sockaddr *an__dst; /* Destination address (key) */ + struct art_node *an__gc; /* Entry on GC list */ + } an_pointer; uint8_t an_plen; /* Prefix length */ }; +#define an_dst an_pointer.an__dst +#define an_gc an_pointer.an__gc + void art_init(void); struct art_root *art_alloc(unsigned int, unsigned int, unsigned int); struct art_node *art_insert(struct art_root *, struct art_node *, uint8_t *, int); struct art_node *art_delete(struct art_root *, struct art_node *, uint8_t *, int); -struct art_node *art_match(struct art_root *, uint8_t *); -struct art_node *art_lookup(struct art_root *, uint8_t *, int); +struct art_node *art_match(struct art_root *, uint8_t *, struct srp_ref *); +struct art_node *art_lookup(struct art_root *, uint8_t *, int, + struct srp_ref *); int art_walk(struct art_root *, int (*)(struct art_node *, void *), void *); Index: net/rtable.c =================================================================== RCS file: /cvs/src/sys/net/rtable.c,v retrieving revision 1.42 diff -u -p -r1.42 rtable.c --- net/rtable.c 18 May 2016 03:46:03 -0000 1.42 +++ net/rtable.c 30 May 2016 04:17:56 -0000 @@ -512,6 +512,9 @@ rtable_mpath_next(struct rtentry *rt) static inline uint8_t *satoaddr(struct art_root *, struct sockaddr *); +int art_mpath_reprio(struct art_root *, struct sockaddr *, int, uint8_t, + struct rtentry *); + void rtentry_ref(void *, void *); void rtentry_unref(void *, void *); @@ -536,7 +539,7 @@ rtable_lookup(unsigned int rtableid, str struct art_root *ar; struct art_node *an; struct rtentry *rt; - struct srp_ref sr; + struct srp_ref sr, nsr; uint8_t *addr; int plen; @@ -548,7 +551,7 @@ rtable_lookup(unsigned int rtableid, str /* No need for a perfect match. */ if (mask == NULL) { - an = art_match(ar, addr); + an = art_match(ar, addr, &nsr); if (an == NULL) return (NULL); } else { @@ -556,11 +559,16 @@ rtable_lookup(unsigned int rtableid, str if (plen == -1) return (NULL); - an = art_lookup(ar, addr, plen); + an = art_lookup(ar, addr, plen, &nsr); + if (an == NULL) + return (NULL); + /* Make sure we've got a perfect match. */ - if (an == NULL || an->an_plen != plen || - memcmp(an->an_dst, dst, dst->sa_len)) + if (an->an_plen != plen || memcmp(an->an_dst, dst, + dst->sa_len)) { + srp_leave(&nsr); return (NULL); + } } #ifdef SMALL_KERNEL @@ -580,12 +588,14 @@ rtable_lookup(unsigned int rtableid, str } if (rt == NULL) { SRPL_LEAVE(&sr); + srp_leave(&nsr); return (NULL); } #endif /* SMALL_KERNEL */ rtref(rt); SRPL_LEAVE(&sr); + srp_leave(&nsr); return (rt); } @@ -596,7 +606,7 @@ rtable_match(unsigned int rtableid, stru struct art_root *ar; struct art_node *an; struct rtentry *rt = NULL; - struct srp_ref sr; + struct srp_ref sr, nsr; uint8_t *addr; #ifndef SMALL_KERNEL int hash; @@ -608,8 +618,7 @@ rtable_match(unsigned int rtableid, stru addr = satoaddr(ar, dst); - KERNEL_LOCK(); - an = art_match(ar, addr); + an = art_match(ar, addr, &nsr); if (an == NULL) goto out; @@ -634,6 +643,16 @@ rtable_match(unsigned int rtableid, stru threshold = (0xffff / npaths) + 1; + /* + * we have no protection against concurrent modification of the + * route list attached to the node, so we won't necessarily + * have the same number of routes. for most modifications, + * we'll pick a route that we wouldn't have if we only saw the + * list before or after the change. if we were going to use + * the last available route, but it got removed, we'll hit + * the end of the list and then pick the first route. + */ + mrt = SRPL_ENTER(&sr, &an->an_rtlist); while (hash > threshold && mrt != NULL) { if (mrt->rt_priority == rt->rt_priority) @@ -649,8 +668,8 @@ rtable_match(unsigned int rtableid, stru SRPL_LEAVE(&sr); } #endif /* SMALL_KERNEL */ + srp_leave(&nsr); out: - KERNEL_UNLOCK(); return (rt); } @@ -661,11 +680,14 @@ rtable_insert(unsigned int rtableid, str { #ifndef SMALL_KERNEL struct rtentry *mrt; + struct srp_ref sr; #endif /* SMALL_KERNEL */ struct art_root *ar; struct art_node *an, *prev; uint8_t *addr; int plen; + unsigned int rt_flags; + int error = 0; KERNEL_ASSERT_LOCKED(); @@ -678,44 +700,63 @@ rtable_insert(unsigned int rtableid, str if (plen == -1) return (EINVAL); + rtref(rt); /* guarantee rtfree wont do anything under ar_lock */ + mtx_enter(&ar->ar_lock); + #ifndef SMALL_KERNEL /* Do not permit exactly the same dst/mask/gw pair. */ - an = art_lookup(ar, addr, plen); - if (an != NULL && an->an_plen == plen && - !memcmp(an->an_dst, dst, dst->sa_len)) { - struct rtentry *mrt; - int mpathok = ISSET(rt->rt_flags, RTF_MPATH); - - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { - if (prio != RTP_ANY && - (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) - continue; - - if (!mpathok || - (mrt->rt_gateway->sa_len == gateway->sa_len && - !memcmp(mrt->rt_gateway, gateway, gateway->sa_len))){ - return (EEXIST); + an = art_lookup(ar, addr, plen, &sr); + if (an != NULL) { + srp_leave(&sr); /* an cannot go away while ar_lock is held */ + if (an->an_plen == plen && + !memcmp(an->an_dst, dst, dst->sa_len)) { + struct rtentry *mrt; + int mpathok = ISSET(rt->rt_flags, RTF_MPATH); + + SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { + if (prio != RTP_ANY && + (mrt->rt_priority & RTP_MASK) != + (prio & RTP_MASK)) + continue; + + if (!mpathok || + (mrt->rt_gateway->sa_len == + gateway->sa_len && + !memcmp(mrt->rt_gateway, gateway, + gateway->sa_len))) { + error = EEXIST; + goto leave; + } } } } #endif /* SMALL_KERNEL */ an = art_get(dst, plen); - if (an == NULL) - return (ENOBUFS); + if (an == NULL) { + error = ENOBUFS; + goto leave; + } - SRPL_INIT(&an->an_rtlist); + /* prepare an for immediate operation if insert succeeds */ + rt_flags = rt->rt_flags; + rt->rt_flags &= ~RTF_MPATH; + rt->rt_dest = dst; + rt->rt_plen = plen; + SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); prev = art_insert(ar, an, addr, plen); - if (prev == NULL) { + if (prev != an) { + SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, + rt_next); + rt->rt_flags = rt_flags; art_put(an); - return (ESRCH); - } - if (prev == an) { - rt->rt_flags &= ~RTF_MPATH; - } else { - art_put(an); + if (prev == NULL) { + error = ESRCH; + goto leave; + } + #ifndef SMALL_KERNEL an = prev; @@ -740,22 +781,19 @@ rtable_insert(unsigned int rtableid, str } } } -#else - return (EEXIST); -#endif /* SMALL_KERNEL */ - } - rt->rt_dest = dst; - rt->rt_plen = plen; - rtref(rt); - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); + SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); -#ifndef SMALL_KERNEL - /* Put newly inserted entry at the right place. */ - rtable_mpath_reprio(rtableid, dst, mask, rt->rt_priority, rt); + /* Put newly inserted entry at the right place. */ + art_mpath_reprio(ar, dst, plen, rt->rt_priority, rt); +#else + error = EEXIST; #endif /* SMALL_KERNEL */ - - return (0); + } +leave: + mtx_leave(&ar->ar_lock); + rtfree(rt); + return (error); } int @@ -764,6 +802,7 @@ rtable_delete(unsigned int rtableid, str { struct art_root *ar; struct art_node *an; + struct srp_ref sr; uint8_t *addr; int plen; #ifndef SMALL_KERNEL @@ -771,8 +810,6 @@ rtable_delete(unsigned int rtableid, str int npaths = 0; #endif /* SMALL_KERNEL */ - KERNEL_ASSERT_LOCKED(); - ar = rtable_get(rtableid, dst->sa_family); if (ar == NULL) return (EAFNOSUPPORT); @@ -780,11 +817,20 @@ rtable_delete(unsigned int rtableid, str addr = satoaddr(ar, dst); plen = rtable_satoplen(dst->sa_family, mask); - an = art_lookup(ar, addr, plen); + mtx_enter(&ar->ar_lock); + + an = art_lookup(ar, addr, plen, &sr); + if (an == NULL) { + mtx_leave(&ar->ar_lock); + return (ESRCH); + } + srp_leave(&sr); + /* Make sure we've got a perfect match. */ - if (an == NULL || an->an_plen != plen || - memcmp(an->an_dst, dst, dst->sa_len)) + if (an->an_plen != plen || memcmp(an->an_dst, dst, dst->sa_len)) { + mtx_leave(&ar->ar_lock); return (ESRCH); + } #ifndef SMALL_KERNEL /* @@ -795,27 +841,38 @@ rtable_delete(unsigned int rtableid, str npaths++; if (npaths > 1) { + rtref(rt); /* guarantee rtfree wont do anything under ar_lock */ KASSERT(rt->rt_refcnt >= 2); SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); - rtfree(rt); mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); an->an_dst = mrt->rt_dest; if (npaths == 2) mrt->rt_flags &= ~RTF_MPATH; + mtx_leave(&ar->ar_lock); + rtfree(rt); return (0); } #endif /* SMALL_KERNEL */ - if (art_delete(ar, an, addr, plen) == NULL) + if (art_delete(ar, an, addr, plen) == NULL) { + mtx_leave(&ar->ar_lock); return (ESRCH); + } + /* + * guarantee rtfree wont do anything under ar_lock, or before + * art_put() + */ + rtref(rt); KASSERT(rt->rt_refcnt >= 2); + SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); + art_put(an); + mtx_leave(&ar->ar_lock); rtfree(rt); - art_put(an); return (0); } @@ -831,16 +888,16 @@ struct rtable_walk_cookie { int rtable_walk_helper(struct art_node *an, void *xrwc) { + struct srp_ref sr; struct rtable_walk_cookie *rwc = xrwc; - struct rtentry *rt, *nrt; + struct rtentry *rt; int error = 0; - KERNEL_ASSERT_LOCKED(); - - SRPL_FOREACH_SAFE_LOCKED(rt, &an->an_rtlist, rt_next, nrt) { + SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { if ((error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid))) break; } + SRPL_LEAVE(&sr); return (error); } @@ -875,29 +932,31 @@ rtable_mpath_capable(unsigned int rtable } int -rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, - struct sockaddr *mask, uint8_t prio, struct rtentry *rt) +art_mpath_reprio(struct art_root *ar, struct sockaddr *dst, int plen, + uint8_t prio, struct rtentry *rt) { - struct art_root *ar; struct art_node *an; + struct srp_ref sr; uint8_t *addr; - int plen; struct rtentry *mrt, *prt = NULL; - ar = rtable_get(rtableid, dst->sa_family); - if (ar == NULL) - return (EAFNOSUPPORT); - addr = satoaddr(ar, dst); - plen = rtable_satoplen(dst->sa_family, mask); + an = art_lookup(ar, addr, plen, &sr); + if (an == NULL) { + return (ESRCH); + } + + /* + * we hold enough locks to be sure the node isn't going away, + * so we don't need the srp. + */ + MUTEX_ASSERT_LOCKED(&ar->ar_lock); + srp_leave(&sr); - an = art_lookup(ar, addr, plen); /* Make sure we've got a perfect match. */ - if (an == NULL || an->an_plen != plen || - memcmp(an->an_dst, dst, dst->sa_len)) + if (an->an_plen != plen || memcmp(an->an_dst, dst, dst->sa_len)) { return (ESRCH); - - KERNEL_ASSERT_LOCKED(); + } SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); rt->rt_priority = prio; @@ -933,6 +992,26 @@ rtable_mpath_reprio(unsigned int rtablei } return (0); +} + +int +rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, + struct sockaddr *mask, uint8_t prio, struct rtentry *rt) +{ + struct art_root *ar; + int plen, rv; + + ar = rtable_get(rtableid, dst->sa_family); + if (ar == NULL) + return (EAFNOSUPPORT); + + plen = rtable_satoplen(dst->sa_family, mask); + + mtx_enter(&ar->ar_lock); + rv = art_mpath_reprio(ar, dst, plen, prio, rt); + mtx_leave(&ar->ar_lock); + + return (rv); } struct rtentry * Index: sys/srp.h =================================================================== RCS file: /cvs/src/sys/sys/srp.h,v retrieving revision 1.9 diff -u -p -r1.9 srp.h --- sys/srp.h 18 May 2016 03:58:13 -0000 1.9 +++ sys/srp.h 30 May 2016 04:17:56 -0000 @@ -34,6 +34,8 @@ struct srp { struct srp_hazard { struct srp *sh_p; void *sh_v; + const char *sh_f; + unsigned int sh_l; }; struct srp_ref { @@ -55,6 +57,7 @@ struct srp_gc { void srp_startup(void); void srp_gc_init(struct srp_gc *, void (*)(void *, void *), void *); +void *srp_swap_locked(struct srp *, void *); void srp_update_locked(struct srp_gc *, struct srp *, void *); void *srp_get_locked(struct srp *); void srp_gc_finalize(struct srp_gc *); @@ -62,12 +65,18 @@ void srp_gc_finalize(struct srp_gc *); void srp_init(struct srp *); #ifdef MULTIPROCESSOR +void *srp_swap(struct srp *, void *); void srp_update(struct srp_gc *, struct srp *, void *); -void *srp_enter(struct srp_ref *, struct srp *); -void *srp_follow(struct srp_ref *, struct srp *); +void srp_finalize(void *, const char *); +void *_srp_enter(const char *, unsigned int, struct srp_ref *, struct srp *); +void *_srp_follow(const char *, unsigned int, struct srp_ref *, struct srp *); void srp_leave(struct srp_ref *); +#define srp_enter(_sr, _srp) _srp_enter(__func__, __LINE__, (_sr), (_srp)) +#define srp_follow(_sr, _srp) _srp_follow(__func__, __LINE__, (_sr), (_srp)) #else /* MULTIPROCESSOR */ +#define srp_swap(_srp, _v) srp_swap_locked((_srp), (_v)) #define srp_update(_gc, _srp, _v) srp_update_locked((_gc), (_srp), (_v)) +#define srp_finalize(_v, _wchan) ((void)0) #define srp_enter(_sr, _srp) ((_srp)->ref) #define srp_follow(_sr, _srp) ((_srp)->ref) #define srp_leave(_sr) do { } while (0)