Index: net/rtable.c =================================================================== RCS file: /cvs/src/sys/net/rtable.c,v retrieving revision 1.50 diff -u -p -r1.50 rtable.c --- net/rtable.c 19 Jul 2016 10:51:44 -0000 1.50 +++ net/rtable.c 29 Aug 2016 11:47:39 -0000 @@ -510,16 +510,607 @@ rtable_mpath_next(struct rtentry *rt) #else /* ART */ -static inline uint8_t *satoaddr(struct art_root *, struct sockaddr *); +#include +#include -void rtentry_ref(void *, void *); -void rtentry_unref(void *, void *); +static inline uint8_t *satoaddr(const struct art_root *, struct sockaddr *); -struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL); +static inline int +sockaddreq(const struct sockaddr *a, const struct sockaddr *b) +{ + return (a->sa_len == b->sa_len && memcmp(a, b, a->sa_len) == 0); +} + +#define RTLIST_ENTRIES 4 /* must be 4 or more */ + +struct rtlist { + unsigned long rl_map[RTLIST_ENTRIES]; /* must be first */ + SLIST_ENTRY(rtlist) rl_gc; +}; +SLIST_HEAD(rtllist, rtlist); + +static inline int +rtlist_is_list(unsigned long entry) +{ + return (entry & 1); +} + +static inline struct rtentry * +rtlist_rt(unsigned long entry) +{ + return ((struct rtentry *)entry); +} + +static inline struct rtlist * +rtlist_list(unsigned long entry) +{ + entry &= ~1UL; + return ((struct rtlist *)entry); +} + +static inline unsigned long * +rtlist_map(unsigned long entry) +{ + entry &= ~1UL; + return ((unsigned long *)entry); +} + +static inline unsigned long +rtlist_map_list(struct rtlist *rl) +{ + unsigned long entry = (unsigned long)rl; + return (entry | 1); +} + +static inline unsigned long +rtlist_map_rt(struct rtentry *rt) +{ + rtref(rt); + return ((unsigned long)rt); +} + +void rtlist_put(struct rtlist *); +void rtlist_gc(void *); + +struct rtlist_ptr { + unsigned long *map; + unsigned int i; +}; + +int rtlist_add(struct rtlist_iter *, struct rtentry *); +int rtlist_insert(struct art_node *, struct rtentry *); +int rtlist_delete(struct art_node *, struct rtentry *); +int rtlist_change(struct art_node *, struct rtentry *, uint8_t); + +unsigned int + rtpaths_init(struct rtpaths *, unsigned long); + +struct pool rtlist_pool; + +struct mutex rtlist_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); +struct rtllist rtlist_gc_list = SLIST_HEAD_INITIALIZER(&rtlist_gc_list); +struct task rtlist_gc_task = TASK_INITIALIZER(rtlist_gc, NULL); + +static inline struct rtlist * +rtlist_get(void) +{ + return (pool_get(&rtlist_pool, PR_NOWAIT | PR_ZERO)); +} + +void +rtlist_put(struct rtlist *rl) +{ + mtx_enter(&rtlist_gc_mtx); + SLIST_INSERT_HEAD(&rtlist_gc_list, rl, rl_gc); + mtx_leave(&rtlist_gc_mtx); + + task_add(systqmp, &rtlist_gc_task); +} + +void +rtlist_gc(void *null) +{ + struct rtllist rll; + struct rtlist *rl, *chain; + int i; + + mtx_enter(&rtlist_gc_mtx); + rll = rtlist_gc_list; + SLIST_INIT(&rtlist_gc_list); + mtx_leave(&rtlist_gc_mtx); + + while ((rl = SLIST_FIRST(&rll)) != NULL) { + SLIST_REMOVE_HEAD(&rll, rl_gc); + + srp_finalize((void *)rtlist_map_list(rl), "rtlfini"); + + i = 1; /* skip count in the first slot */ + do { + do { + KASSERT(!rtlist_is_list(rl->rl_map[i])); + rtfree(rtlist_rt(rl->rl_map[i])); + } while (++i < nitems(rl->rl_map) - 1); + + if (rtlist_is_list(rl->rl_map[i])) { + chain = rtlist_list(rl->rl_map[i]); + i = 0; + } else { + chain = NULL; + rtfree(rtlist_rt(rl->rl_map[i])); + } + + pool_put(&rtlist_pool, rl); + + rl = chain; + } while (rl != NULL); + } +} + +static inline struct rtentry * +rtlist_cur(struct rtlist_iter *ri) +{ + return (rtlist_rt(ri->ri_map[ri->ri_i])); +} + +static inline struct rtentry * +rtpaths_cur(struct rtpaths *rp) +{ + return (rtlist_cur(&rp->rp_i)); +} + +struct rtentry * +rtlist_next(struct rtlist_iter *ri) +{ + ri->ri_i++; + if (ri->ri_i >= RTLIST_ENTRIES) + return (NULL); + + if (rtlist_is_list(ri->ri_map[ri->ri_i])) { + ri->ri_map = rtlist_map(ri->ri_map[ri->ri_i]); + ri->ri_i = 0; + } + + return (rtlist_cur(ri)); +} + +int +rtlist_add(struct rtlist_iter *ri, struct rtentry *rt) +{ + if (ri->ri_i >= RTLIST_ENTRIES) { + struct rtlist *rl; + + rl = rtlist_get(); + if (rl == NULL) + return (ENOBUFS); + + rl->rl_map[0] = ri->ri_map[RTLIST_ENTRIES - 1]; + ri->ri_map[RTLIST_ENTRIES - 1] = rtlist_map_list(rl); + + ri->ri_map = rl->rl_map; + ri->ri_i = 1; + } + + ri->ri_map[ri->ri_i] = rtlist_map_rt(rt); + ri->ri_i++; + + return (0); +} + +int +rtlist_insert(struct art_node *an, struct rtentry *rt) +{ + struct rtlist *rl; + struct rtentry *mrt; + struct rtentry *prt = NULL; + struct rtpaths rp; + struct rtlist_iter nri; + unsigned long route; + unsigned int count; + int error; + + rl = rtlist_get(); + if (rl == NULL) + return (ENOBUFS); + + nri.ri_map = rl->rl_map; + nri.ri_i = 1; /* skip head count */ + + route = (unsigned long)srp_get_locked(&an->an_route); + count = rtpaths_init(&rp, route); + + mrt = rtpaths_cur(&rp); + if (rt->rt_priority > mrt->rt_priority) + count = 1; + else if (rt->rt_priority == mrt->rt_priority) + count++; + + do { + if (rt->rt_priority > mrt->rt_priority || + memcmp(rt->rt_gateway, mrt->rt_gateway, + rt->rt_gateway->sa_len) < 0) + break; + + error = rtlist_add(&nri, mrt); + if (error != 0) + goto fail; + + if (prt == NULL && rt->rt_priority == mrt->rt_priority) + prt = mrt; + + mrt = rtpaths_next(&rp); + } while (mrt != NULL); + + error = rtlist_add(&nri, rt); + if (error != 0) + goto fail; + + while (mrt != NULL) { + error = rtlist_add(&nri, mrt); + if (error != 0) + goto fail; + + if (prt == NULL && rt->rt_priority == mrt->rt_priority) + prt = mrt; + + mrt = rtpaths_next(&rp); + } + + /* we're committed to replacing the list now */ + + rl->rl_map[0] = count; + + if (prt != NULL) + SET(prt->rt_flags, RTF_MPATH); + else + CLR(rt->rt_flags, RTF_MPATH); + + srp_swap_locked(&an->an_route, (void *)rtlist_map_list(rl)); + if (rtlist_is_list(route)) + rtlist_put(rtlist_list(route)); + else + rtfree(rtlist_rt(route)); + + return (0); + +fail: + rtlist_put(rl); + return (error); +} + +int +rtlist_delete(struct art_node *an, struct rtentry *rt) +{ + struct rtlist *rl; + struct rtlist_iter ri; + struct rtlist_iter nri; + struct rtlist_iter pri = { NULL, 0 }; + unsigned long route; + struct rtentry *mrt; + uint8_t prio; + unsigned long count; + int error; + + route = (unsigned long)srp_get_locked(&an->an_route); + + /* is this the last route? */ + if (rtlist_rt(route) == rt) { + srp_swap_locked(&an->an_route, NULL); + rtfree(rtlist_rt(route)); + return (ESHUTDOWN); + } + KASSERT(rtlist_is_list(route)); /* XXX */ + + ri.ri_map = rtlist_map(route); + ri.ri_i = 1; /* skip count */ + + /* are we leaving one route in the list? */ + if (ri.ri_map[3] == 0) { + struct rtentry *mrt; + + mrt = rtlist_rt(ri.ri_map[1]); + if (mrt == rt) + mrt = rtlist_rt(ri.ri_map[2]); + + an->an_dst = mrt->rt_dest; + + CLR(mrt->rt_flags, RTF_MPATH); + srp_swap_locked(&an->an_route, (void *)rtlist_map_rt(mrt)); + + rtlist_put(rtlist_list(route)); + return (0); + } + + /* build a new list */ + rl = rtlist_get(); + if (rl == NULL) + return (ENOBUFS); + + nri.ri_map = rl->rl_map; + nri.ri_i = 1; /* skip count */ + + mrt = rtlist_cur(&ri); + do { + if (rt != mrt) { + error = rtlist_add(&nri, mrt); + if (error != 0) + goto fail; + } + + mrt = rtlist_next(&ri); + } while (mrt != NULL); + + /* we're committed to replacing the list now */ + + nri.ri_map = rl->rl_map; + nri.ri_i = 1; + + mrt = rtlist_cur(&nri); + an->an_dst = mrt->rt_dest; + + prio = mrt->rt_priority; + count = 1; + + for (;;) { + if (pri.ri_map == NULL && rt->rt_priority == mrt->rt_priority) + pri = nri; + + mrt = rtlist_next(&nri); + if (mrt == NULL) + break; + + if (mrt->rt_priority == prio) + count++; + } + + rl->rl_map[0] = count; + + if (pri.ri_map != NULL) { + struct rtentry *nrt; + + mrt = rtlist_cur(&pri); + nrt = rtlist_next(&pri); + if (nrt == NULL || mrt->rt_priority != nrt->rt_priority) { + /* mrt is the last route at this priority */ + CLR(mrt->rt_flags, RTF_MPATH); + } + } + + srp_swap_locked(&an->an_route, (void *)rtlist_map_list(rl)); + rtlist_put(rtlist_list(route)); + + return (0); + +fail: + rtlist_put(rl); + return (error); +} + +int +rtlist_change(struct art_node *an, struct rtentry *rt, uint8_t prio) +{ + struct rtlist *rl; + struct rtlist_iter ri; + struct rtlist_iter nri; + struct rtlist_iter pri = { NULL, 0 }; + unsigned long route; + struct rtentry *mrt; + struct rtentry *prt; + unsigned int count; + int error; + + /* no change? */ + if (rt->rt_priority == prio) + return (0); + + route = (unsigned long)srp_get_locked(&an->an_route); + + /* is this the only route? */ + if (rtlist_rt(route) == rt) { + rt->rt_priority = prio; + return (0); + } + KASSERT(rtlist_is_list(route)); /* XXX */ + + /* need a make a new list */ + rl = rtlist_get(); + if (rl == NULL) + return (ENOBUFS); + + ri.ri_map = rtlist_map(route); + ri.ri_i = 1; /* skip count */ + count = ri.ri_map[0]; + + nri.ri_map = rl->rl_map; + nri.ri_i = 1; /* skip head count */ + + mrt = rtlist_cur(&ri); + if (mrt->rt_priority < prio) + rl->rl_map[0] = 1; + else if (mrt->rt_priority == prio) + rl->rl_map[0] = count + 1; + else if (mrt->rt_priority == rt->rt_priority) + rl->rl_map[0] = count - 1; + do { + if (mrt != rt) { + if (prio > mrt->rt_priority || + memcmp(rt->rt_gateway, mrt->rt_gateway, + rt->rt_gateway->sa_len) < 0); + break; + + error = rtlist_add(&nri, mrt); + if (error != 0) + goto fail; + } + + mrt = rtlist_next(&ri); + } while (mrt != NULL); + + error = rtlist_add(&nri, rt); + if (error != 0) + goto fail; + + while (mrt != NULL) { + if (mrt != rt) { + error = rtlist_add(&nri, mrt); + if (error != 0) + goto fail; + } + + mrt = rtlist_next(&ri); + } + + /* we're committed to replacing the list now */ + + nri.ri_map = rl->rl_map; + nri.ri_i = 1; + + mrt = rtlist_cur(&nri); + do { + if (mrt != rt) { + /* keep track of the first route with the old pri */ + if (pri.ri_map == NULL && + mrt->rt_priority == rt->rt_priority) + pri = nri; + + /* find the first route with the new pri */ + if (prt == NULL && + mrt->rt_priority == prio) + prt = mrt; + } + + mrt = rtlist_next(&nri); + } while (mrt != NULL); + + rt->rt_priority = prio; + + if (pri.ri_map != NULL) { + struct rtentry *nrt; + + mrt = rtlist_cur(&pri); + nrt = rtlist_next(&pri); + if (nrt == NULL || nrt->rt_priority != mrt->rt_priority) { + /* this is the last one at this prio level */ + CLR(mrt->rt_flags, RTF_MPATH); + } + } + + if (prt != NULL) { + SET(rt->rt_flags, RTF_MPATH); + SET(prt->rt_flags, RTF_MPATH); + } else + CLR(rt->rt_flags, RTF_MPATH); + + srp_swap_locked(&an->an_route, (void *)rtlist_map_list(rl)); + rtlist_put(rtlist_list(route)); + + return (0); + +fail: + rtlist_put(rl); + return (error); +} + +unsigned int +rtpaths_init(struct rtpaths *rp, unsigned long route) +{ + unsigned int count = 1; + + rp->rp_entries[0] = route; + if (rtlist_is_list(rp->rp_entries[0])) { + rp->rp_i.ri_map = rtlist_map(rp->rp_entries[0]); + /* 0th slot contains the number of high pri routes */ + rp->rp_i.ri_i = 1; + count = rp->rp_i.ri_map[0]; + } else { + rp->rp_entries[1] = 0UL; + rp->rp_i.ri_map = rp->rp_entries; + rp->rp_i.ri_i = 0; + } + + return (count); +} + +struct rtentry * +rtpaths_match(struct rtpaths *rp, unsigned int rtableid, struct sockaddr *dst) +{ + struct art_root *ar; + struct art_node *an; + + rp->rp_entries[0] = 0; /* guard srp_leave in rtpaths_leave */ + + ar = rtable_get(rtableid, dst->sa_family); + if (ar == NULL) + return (NULL); + + an = art_match(ar, satoaddr(ar, dst), &rp->rp_sr); + if (an == NULL) { + srp_leave(&rp->rp_sr); + return (NULL); + } + + rtpaths_init(rp, (unsigned long)srp_follow(&rp->rp_sr, &an->an_route)); + + return (rtpaths_cur(rp)); +} + +struct rtentry * +rtpaths_lookup(struct rtpaths *rp, unsigned int rtableid, + struct sockaddr *dst, struct sockaddr *mask) +{ + struct art_root *ar; + struct art_node *an; + int plen; + + rp->rp_entries[0] = 0; /* guard srp_leave in rtpaths_leave */ + + ar = rtable_get(rtableid, dst->sa_family); + if (ar == NULL) + return (NULL); + + plen = rtable_satoplen(dst->sa_family, mask); + if (plen == -1) + return (NULL); + + an = art_lookup(ar, satoaddr(ar, dst), plen, &rp->rp_sr); + + /* Make sure we've got a perfect match. */ + if (an == NULL || an->an_plen != plen || + memcmp(an->an_dst, dst, dst->sa_len) != 0) { + srp_leave(&rp->rp_sr); + return (NULL); + } + + rtpaths_init(rp, (unsigned long)srp_follow(&rp->rp_sr, &an->an_route)); + + return (rtlist_cur(&rp->rp_i)); +} + +struct rtentry * +rtpaths_next(struct rtpaths *rp) +{ +#ifdef SMALL_KERNEL + KASSERT(rtlist_next(&rp->rp_i) == NULL); + return (NULL); +#else + return (rtlist_next(&rp->rp_i)); +#endif +} + +void +rtpaths_leave(struct rtpaths *rp) +{ + if (rp->rp_entries[0] == 0) + return; + + srp_leave(&rp->rp_sr); +} void rtable_init_backend(unsigned int keylen) { + pool_init(&rtlist_pool, sizeof(struct rtlist), 0, 0, 0, "rtlist", NULL); + pool_setipl(&rtlist_pool, IPL_SOFTNET); + art_init(); } @@ -533,59 +1124,30 @@ struct rtentry * rtable_lookup(unsigned int rtableid, struct sockaddr *dst, struct sockaddr *mask, struct sockaddr *gateway, uint8_t prio) { - struct art_root *ar; - struct art_node *an; struct rtentry *rt = NULL; - struct srp_ref sr, nsr; - uint8_t *addr; - int plen; - - ar = rtable_get(rtableid, dst->sa_family); - if (ar == NULL) - return (NULL); - - addr = satoaddr(ar, dst); + struct rtpaths rp; /* No need for a perfect match. */ - if (mask == NULL) { - an = art_match(ar, addr, &nsr); - if (an == NULL) - goto out; - } else { - plen = rtable_satoplen(dst->sa_family, mask); - if (plen == -1) - return (NULL); - - an = art_lookup(ar, addr, plen, &nsr); - - /* Make sure we've got a perfect match. */ - if (an == NULL || an->an_plen != plen || - memcmp(an->an_dst, dst, dst->sa_len)) - goto out; - } + if (mask == NULL) + rt = rtpaths_match(&rp, rtableid, dst); + else + rt = rtpaths_lookup(&rp, rtableid, dst, mask); -#ifdef SMALL_KERNEL - rt = SRPL_ENTER(&sr, &an->an_rtlist); -#else - SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { + if (rt == NULL) + return (NULL); + + do { if (prio != RTP_ANY && - (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) + ((rt->rt_priority & RTP_MASK) != (prio & RTP_MASK))) continue; - if (gateway == NULL) - break; - - if (rt->rt_gateway->sa_len == gateway->sa_len && - memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0) + if (gateway == NULL || sockaddreq(rt->rt_gateway, gateway)) { + rtref(rt); break; - } -#endif /* SMALL_KERNEL */ - if (rt != NULL) - rtref(rt); + } + } while ((rt = rtpaths_next(&rp)) != NULL); - SRPL_LEAVE(&sr); -out: - srp_leave(&nsr); + rtpaths_leave(&rp); return (rt); } @@ -594,72 +1156,59 @@ struct rtentry * rtable_match(unsigned int rtableid, struct sockaddr *dst, uint32_t *src) { struct art_root *ar; + struct srp_ref sr; struct art_node *an; struct rtentry *rt = NULL; - struct srp_ref sr, nsr; - uint8_t *addr; -#ifndef SMALL_KERNEL - int hash; -#endif /* SMALL_KERNEL */ + unsigned long entry; ar = rtable_get(rtableid, dst->sa_family); if (ar == NULL) return (NULL); - addr = satoaddr(ar, dst); - - an = art_match(ar, addr, &nsr); + an = art_match(ar, satoaddr(ar, dst), &sr); if (an == NULL) goto out; - rt = SRPL_ENTER(&sr, &an->an_rtlist); - rtref(rt); - SRPL_LEAVE(&sr); - -#ifndef SMALL_KERNEL - /* Gateway selection by Hash-Threshold (RFC 2992) */ - if ((hash = rt_hash(rt, dst, src)) != -1) { - struct rtentry *mrt; - int threshold, npaths = 0; - - KASSERT(hash <= 0xffff); - - SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) { - /* Only count nexthops with the same priority. */ - if (mrt->rt_priority == rt->rt_priority) - npaths++; - } - SRPL_LEAVE(&sr); - - threshold = (0xffff / npaths) + 1; + entry = (unsigned long)srp_follow(&sr, &an->an_route); +#ifdef SMALL_KERNEL + rt = rtlist_rt(entry); +#else /* SMALL_KERNEL */ + if (!rtlist_is_list(entry)) + rt = rtlist_rt(entry); + else { + unsigned long *map; + int i; + int hash, threshold, npaths; + + map = rtlist_map(entry); + i = 1; + + rt = rtlist_rt(map[i]); + + npaths = map[0]; + if (npaths > 1 && (hash = rt_hash(rt, dst, src)) != -1) { + KASSERT(hash <= 0xffff); + + threshold = (0xffff / npaths) + 1; + + while (hash < threshold) { + i++; + if (rtlist_is_list(map[i])) { + map = rtlist_map(map[i]); + i = 0; + } - /* - * we have no protection against concurrent modification of the - * route list attached to the node, so we won't necessarily - * have the same number of routes. for most modifications, - * we'll pick a route that we wouldn't have if we only saw the - * list before or after the change. if we were going to use - * the last available route, but it got removed, we'll hit - * the end of the list and then pick the first route. - */ - - mrt = SRPL_ENTER(&sr, &an->an_rtlist); - while (hash > threshold && mrt != NULL) { - if (mrt->rt_priority == rt->rt_priority) hash -= threshold; - mrt = SRPL_NEXT(&sr, mrt, rt_next); - } + } - if (mrt != NULL) { - rtref(mrt); - rtfree(rt); - rt = mrt; + rt = rtlist_rt(map[i]); } - SRPL_LEAVE(&sr); } #endif /* SMALL_KERNEL */ + KASSERT(rt != NULL); + rtref(rt); out: - srp_leave(&nsr); + srp_leave(&sr); return (rt); } @@ -669,7 +1218,6 @@ rtable_insert(unsigned int rtableid, str struct rtentry *rt) { #ifndef SMALL_KERNEL - struct rtentry *mrt; struct srp_ref sr; #endif /* SMALL_KERNEL */ struct art_root *ar; @@ -679,8 +1227,6 @@ rtable_insert(unsigned int rtableid, str unsigned int rt_flags; int error = 0; - KERNEL_ASSERT_LOCKED(); - ar = rtable_get(rtableid, dst->sa_family); if (ar == NULL) return (EAFNOSUPPORT); @@ -690,29 +1236,29 @@ rtable_insert(unsigned int rtableid, str if (plen == -1) return (EINVAL); - rtref(rt); /* guarantee rtfree won't do anything during insert */ - #ifndef SMALL_KERNEL /* Do not permit exactly the same dst/mask/gw pair. */ an = art_lookup(ar, addr, plen, &sr); srp_leave(&sr); /* an can't go away while we have the lock */ - if (an != NULL && an->an_plen == plen && - !memcmp(an->an_dst, dst, dst->sa_len)) { + + if (an != NULL && an->an_plen == plen && sockaddreq(an->an_dst, dst)) { struct rtentry *mrt; int mpathok = ISSET(rt->rt_flags, RTF_MPATH); + struct rtpaths rp; + + rtpaths_init(&rp, (unsigned long)srp_get_locked(&an->an_route)); - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { + mrt = rtlist_cur(&rp.rp_i); + do { if (prio != RTP_ANY && (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) continue; - if (!mpathok || - (mrt->rt_gateway->sa_len == gateway->sa_len && - !memcmp(mrt->rt_gateway, gateway, gateway->sa_len))){ + if (!mpathok || sockaddreq(mrt->rt_gateway, gateway)) { error = EEXIST; goto leave; } - } + } while ((mrt = rtlist_next(&rp.rp_i)) != NULL); } #endif /* SMALL_KERNEL */ @@ -727,12 +1273,12 @@ rtable_insert(unsigned int rtableid, str rt->rt_flags &= ~RTF_MPATH; rt->rt_dest = dst; rt->rt_plen = plen; - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); + + srp_swap_locked(&an->an_route, (void *)rtlist_map_rt(rt)); prev = art_insert(ar, an, addr, plen); if (prev != an) { - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, - rt_next); + rtfree(rt); rt->rt_flags = rt_flags; art_put(an); @@ -742,40 +1288,12 @@ rtable_insert(unsigned int rtableid, str } #ifndef SMALL_KERNEL - an = prev; - - mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); - KASSERT(mrt != NULL); - KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio); - - /* - * An ART node with the same destination/netmask already - * exists, MPATH conflict must have been already checked. - */ - if (rt->rt_flags & RTF_MPATH) { - /* - * Only keep the RTF_MPATH flag if two routes have - * the same gateway. - */ - rt->rt_flags &= ~RTF_MPATH; - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { - if (mrt->rt_priority == prio) { - mrt->rt_flags |= RTF_MPATH; - rt->rt_flags |= RTF_MPATH; - } - } - } - - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); - - /* Put newly inserted entry at the right place. */ - rtable_mpath_reprio(rtableid, dst, mask, rt->rt_priority, rt); + error = rtlist_insert(prev, rt); #else error = EEXIST; #endif /* SMALL_KERNEL */ } leave: - rtfree(rt); return (error); } @@ -788,10 +1306,7 @@ rtable_delete(unsigned int rtableid, str struct srp_ref sr; uint8_t *addr; int plen; -#ifndef SMALL_KERNEL - struct rtentry *mrt; - int npaths = 0; -#endif /* SMALL_KERNEL */ + int error; ar = rtable_get(rtableid, dst->sa_family); if (ar == NULL) @@ -806,39 +1321,24 @@ rtable_delete(unsigned int rtableid, str srp_leave(&sr); /* an can't go away while we have the lock */ /* Make sure we've got a perfect match. */ - if (an == NULL || an->an_plen != plen || - memcmp(an->an_dst, dst, dst->sa_len)) + if (an == NULL || an->an_plen != plen || !sockaddreq(an->an_dst, dst)) return (ESRCH); -#ifndef SMALL_KERNEL - /* - * If other multipath route entries are still attached to - * this ART node we only have to unlink it. - */ - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) - npaths++; - - if (npaths > 1) { - KASSERT(rt->rt_refcnt >= 1); - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, - rt_next); - - mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); - an->an_dst = mrt->rt_dest; - if (npaths == 2) - mrt->rt_flags &= ~RTF_MPATH; - return (0); + error = rtlist_delete(an, rt); + switch (error) { + case ESHUTDOWN: + if (art_delete(ar, an, addr, plen) == NULL) + panic("art_delete node gone under lock"); + art_put(an); + error = 0; + break; + case 0: + break; + default: + break; } -#endif /* SMALL_KERNEL */ - - if (art_delete(ar, an, addr, plen) == NULL) - return (ESRCH); - KASSERT(rt->rt_refcnt >= 1); - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); - - art_put(an); - return (0); + return (error); } struct rtable_walk_cookie { @@ -853,16 +1353,23 @@ struct rtable_walk_cookie { int rtable_walk_helper(struct art_node *an, void *xrwc) { - struct srp_ref sr; struct rtable_walk_cookie *rwc = xrwc; + struct rtpaths rp; struct rtentry *rt; int error = 0; - SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { - if ((error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid))) + rtpaths_init(&rp, (unsigned long)srp_enter(&rp.rp_sr, &an->an_route)); + + rt = rtlist_cur(&rp.rp_i); + do { + error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid); + if (error) break; - } - SRPL_LEAVE(&sr); + + rt = rtlist_next(&rp.rp_i); + } while (rt != NULL); + + srp_leave(&rp.rp_sr); return (error); } @@ -905,7 +1412,7 @@ rtable_mpath_reprio(unsigned int rtablei struct srp_ref sr; uint8_t *addr; int plen; - struct rtentry *mrt, *prt = NULL; + int error = 0; ar = rtable_get(rtableid, dst->sa_family); if (ar == NULL) @@ -920,79 +1427,22 @@ rtable_mpath_reprio(unsigned int rtablei srp_leave(&sr); /* an can't go away while we have the lock */ /* Make sure we've got a perfect match. */ - if (an == NULL || an->an_plen != plen || - memcmp(an->an_dst, dst, dst->sa_len)) - return (ESRCH); - - rtref(rt); /* keep rt alive in between remove and add */ - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); - rt->rt_priority = prio; - - if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) != NULL) { - /* - * Select the order of the MPATH routes. - */ - while (SRPL_NEXT_LOCKED(mrt, rt_next) != NULL) { - if (mrt->rt_priority > prio) - break; - prt = mrt; - mrt = SRPL_NEXT_LOCKED(mrt, rt_next); - } - - if (mrt->rt_priority > prio) { - /* - * ``rt'' has a higher (smaller) priority than - * ``mrt'' so put it before in the list. - */ - if (prt != NULL) { - SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, - rt_next); - } else { - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, - rt, rt_next); - } - } else { - SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next); - } - } else { - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); - } - rtfree(rt); - - return (0); -} + if (an == NULL || !sockaddreq(an->an_dst, dst)) + error = ESRCH; + else + error = rtlist_change(an, rt, prio); -struct rtentry * -rtable_mpath_next(struct rtentry *rt) -{ - KERNEL_ASSERT_LOCKED(); - return (SRPL_NEXT_LOCKED(rt, rt_next)); + return (error); } #endif /* SMALL_KERNEL */ -void -rtentry_ref(void *null, void *xrt) -{ - struct rtentry *rt = xrt; - - rtref(rt); -} - -void -rtentry_unref(void *null, void *xrt) -{ - struct rtentry *rt = xrt; - - rtfree(rt); -} - /* * Return a pointer to the address (key). This is an heritage from the * BSD radix tree needed to skip the non-address fields from the flavor * of "struct sockaddr" used by this routing table. */ static inline uint8_t * -satoaddr(struct art_root *at, struct sockaddr *sa) +satoaddr(const struct art_root *at, struct sockaddr *sa) { return (((uint8_t *)sa) + at->ar_off); } Index: net/rtable.h =================================================================== RCS file: /cvs/src/sys/net/rtable.h,v retrieving revision 1.15 diff -u -p -r1.15 rtable.h --- net/rtable.h 18 Jan 2016 15:38:52 -0000 1.15 +++ net/rtable.h 29 Aug 2016 11:47:39 -0000 @@ -46,6 +46,10 @@ #define rt_plen(rt) ((rt)->rt_plen) #define RT_ROOT(rt) (0) +struct rtlist_iter { + unsigned long *ri_map; + unsigned int ri_i; +}; #endif /* ART */ int rtable_satoplen(sa_family_t, struct sockaddr *); @@ -73,4 +77,23 @@ struct rtentry *rtable_mpath_match(unsig int rtable_mpath_reprio(unsigned int, struct sockaddr *, struct sockaddr *, uint8_t, struct rtentry *); struct rtentry *rtable_mpath_next(struct rtentry *); + +struct rtpaths { +#ifdef ART + struct srp_ref rp_sr; + unsigned long rp_entries[2]; + struct rtlist_iter rp_i; +#else + struct rtentry *rp_rt; +#endif +}; + +struct rtentry *rtpaths_match(struct rtpaths *, + unsigned int, struct sockaddr *); +struct rtentry *rtpaths_lookup(struct rtpaths *, + unsigned int, struct sockaddr *, + struct sockaddr *); +struct rtentry *rtpaths_next(struct rtpaths *); +void rtpaths_leave(struct rtpaths *); + #endif /* _NET_RTABLE_H_ */ Index: net/art.c =================================================================== RCS file: /cvs/src/sys/net/art.c,v retrieving revision 1.22 diff -u -p -r1.22 art.c --- net/art.c 19 Jul 2016 10:51:44 -0000 1.22 +++ net/art.c 29 Aug 2016 11:47:39 -0000 @@ -936,7 +952,7 @@ art_get(struct sockaddr *dst, uint8_t pl an->an_dst = dst; an->an_plen = plen; - SRPL_INIT(&an->an_rtlist); + srp_init(&an->an_route); return (an); } @@ -944,8 +960,6 @@ art_get(struct sockaddr *dst, uint8_t pl void art_put(struct art_node *an) { - KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist)); - mtx_enter(&art_node_gc_mtx); an->an_gc = art_node_gc_list; art_node_gc_list = an; Index: net/art.h =================================================================== RCS file: /cvs/src/sys/net/art.h,v retrieving revision 1.14 diff -u -p -r1.14 art.h --- net/art.h 14 Jun 2016 04:42:02 -0000 1.14 +++ net/art.h 29 Aug 2016 11:47:39 -0000 @@ -43,7 +43,7 @@ struct rtentry; * A node is the internal representation of a route entry. */ struct art_node { - SRPL_HEAD(, rtentry) an_rtlist; /* Route related to this node */ + struct srp an_route; /* Route related to this node */ union { struct sockaddr *an__dst; /* Destination address (key) */ struct art_node *an__gc; /* Entry on GC list */ Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.437 diff -u -p -r1.437 if.c --- net/if.c 11 Aug 2016 00:58:22 -0000 1.437 +++ net/if.c 29 Aug 2016 11:47:39 -0000 @@ -2403,7 +2406,8 @@ if_group_egress_build(void) #ifdef INET6 struct sockaddr_in6 sa_in6; #endif - struct rtentry *rt0, *rt; + struct rtpaths rp; + struct rtentry *rt; TAILQ_FOREACH(ifg, &ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, IFG_EGRESS)) @@ -2416,44 +2420,27 @@ if_group_egress_build(void) bzero(&sa_in, sizeof(sa_in)); sa_in.sin_len = sizeof(sa_in); sa_in.sin_family = AF_INET; - rt0 = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY); - if (rt0 != NULL) { - rt = rt0; - do { - ifp = if_get(rt->rt_ifidx); - if (ifp != NULL) { - if_addgroup(ifp, IFG_EGRESS); - if_put(ifp); - } -#ifndef SMALL_KERNEL - rt = rtable_mpath_next(rt); -#else - rt = NULL; -#endif - } while (rt != NULL); + for (rt = rtpaths_lookup(&rp, 0, sintosa(&sa_in), sintosa(&sa_in)); + rt != NULL; rt = rtpaths_next(&rp)) { + ifp = if_get(rt->rt_ifidx); + if (ifp != NULL) { + if_addgroup(ifp, IFG_EGRESS); + if_put(ifp); + } } - rtfree(rt0); + rtpaths_leave(&rp); #ifdef INET6 bcopy(&sa6_any, &sa_in6, sizeof(sa_in6)); - rt0 = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL, - RTP_ANY); - if (rt0 != NULL) { - rt = rt0; - do { - ifp = if_get(rt->rt_ifidx); - if (ifp != NULL) { - if_addgroup(ifp, IFG_EGRESS); - if_put(ifp); - } -#ifndef SMALL_KERNEL - rt = rtable_mpath_next(rt); -#else - rt = NULL; -#endif - } while (rt != NULL); + for (rt = rtpaths_lookup(&rp, 0, sin6tosa(&sa_in6), sin6tosa(&sa_in6)); + rt != NULL; rt = rtpaths_next(&rp)) { + ifp = if_get(rt->rt_ifidx); + if (ifp != NULL) { + if_addgroup(ifp, IFG_EGRESS); + if_put(ifp); + } } - rtfree(rt0); + rtpaths_leave(&rp); #endif /* INET6 */ return (0); Index: net/pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.982 diff -u -p -r1.982 pf.c --- net/pf.c 20 Aug 2016 08:34:30 -0000 1.982 +++ net/pf.c 29 Aug 2016 11:47:40 -0000 @@ -5603,7 +5603,8 @@ pf_routable(struct pf_addr *addr, sa_fam #ifdef INET6 struct sockaddr_in6 *dst6; #endif /* INET6 */ - struct rtentry *rt, *rt0 = NULL; + struct rtpaths rp; + struct rtentry *rt; check_mpath = 0; memset(&ss, 0, sizeof(ss)); @@ -5638,8 +5639,8 @@ pf_routable(struct pf_addr *addr, sa_fam if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) goto out; - rt0 = rtalloc((struct sockaddr *)&ss, 0, rtableid); - if (rt0 != NULL) { + rt = rtpaths_match(&rp, rtableid, (struct sockaddr *)&ss); + if (rt != NULL) { /* No interface given, this is a no-route check */ if (kif == NULL) goto out; @@ -5651,7 +5652,6 @@ pf_routable(struct pf_addr *addr, sa_fam /* Perform uRPF check if passed input interface */ ret = 0; - rt = rt0; do { if (rt->rt_ifidx == kif->pfik_ifp->if_index) { ret = 1; @@ -5667,16 +5667,13 @@ pf_routable(struct pf_addr *addr, sa_fam #endif /* NCARP */ } -#ifndef SMALL_KERNEL - rt = rtable_mpath_next(rt); -#else - rt = NULL; -#endif /* SMALL_KERNEL */ - } while (check_mpath == 1 && rt != NULL && ret == 0); + rt = rtpaths_next(&rp); + } while (ret == 0 && check_mpath == 1 && + (rt = rtpaths_next(&rp)) != NULL); } else ret = 0; out: - rtfree(rt0); + rtpaths_leave(&rp); return (ret); } Index: netinet/if_ether.c =================================================================== RCS file: /cvs/src/sys/netinet/if_ether.c,v retrieving revision 1.221 diff -u -p -r1.221 if_ether.c --- netinet/if_ether.c 22 Aug 2016 16:01:52 -0000 1.221 +++ netinet/if_ether.c 29 Aug 2016 11:47:40 -0000 @@ -81,7 +81,8 @@ int arpt_down = 20; /* once declared do void arpinvalidate(struct rtentry *); void arptfree(struct rtentry *); void arptimer(void *); -struct rtentry *arplookup(struct in_addr *, int, int, unsigned int); +struct rtentry *arplookup(struct in_addr *, int, unsigned int); +struct rtentry *arpproxylookup(struct in_addr *, unsigned int); void in_arpinput(struct ifnet *, struct mbuf *); void in_revarpinput(struct ifnet *, struct mbuf *); int arpcache(struct ifnet *, struct ether_arp *, struct rtentry *); @@ -528,7 +529,7 @@ in_arpinput(struct ifnet *ifp, struct mb #endif /* Do we have an ARP cache for the sender? Create if we are target. */ - rt = arplookup(&isaddr, target, 0, rdomain); + rt = arplookup(&isaddr, target, rdomain); /* Check sender against our interface addresses. */ if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) && @@ -555,7 +556,7 @@ in_arpinput(struct ifnet *ifp, struct mb eaddr = LLADDR(ifp->if_sadl); } else { rtfree(rt); - rt = arplookup(&itaddr, 0, SIN_PROXY, rdomain); + rt = arpproxylookup(&itaddr, rdomain); /* * Protect from possible duplicates, only owner * should respond @@ -708,7 +709,7 @@ arptfree(struct rtentry *rt) * Lookup or enter a new address in arptab. */ struct rtentry * -arplookup(struct in_addr *inp, int create, int proxy, u_int tableid) +arplookup(struct in_addr *inp, int create, u_int tableid) { struct rtentry *rt; struct sockaddr_inarp sin; @@ -718,7 +719,6 @@ arplookup(struct in_addr *inp, int creat sin.sin_len = sizeof(sin); sin.sin_family = AF_INET; sin.sin_addr.s_addr = inp->s_addr; - sin.sin_other = proxy ? SIN_PROXY : 0; flags = (create) ? RT_RESOLVE : 0; rt = rtalloc((struct sockaddr *)&sin, flags, tableid); @@ -729,22 +729,30 @@ arplookup(struct in_addr *inp, int creat return (NULL); } - if (proxy && !ISSET(rt->rt_flags, RTF_ANNOUNCE)) { - struct rtentry *mrt = NULL; -#if defined(ART) && !defined(SMALL_KERNEL) - mrt = rt; - KERNEL_LOCK(); - while ((mrt = rtable_mpath_next(mrt)) != NULL) { - if (ISSET(mrt->rt_flags, RTF_ANNOUNCE)) { - rtref(mrt); - break; - } + return (rt); +} + +struct rtentry * +arpproxylookup(struct in_addr *inp, unsigned int tableid) +{ + struct rtpaths rp; + struct rtentry *rt; + struct sockaddr_inarp sin; + + memset(&sin, 0, sizeof(sin)); + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = inp->s_addr; + sin.sin_other = SIN_PROXY; + + for (rt = rtpaths_match(&rp, tableid, (struct sockaddr *)&sin); + rt != NULL; rt = rtpaths_next(&rp)) { + if (ISSET(rt->rt_flags, RTF_ANNOUNCE)) { + rtref(rt); + break; } - KERNEL_UNLOCK(); -#endif /* ART && !SMALL_KERNEL */ - rtfree(rt); - return (mrt); } + rtpaths_leave(&rp); return (rt); } @@ -760,7 +768,7 @@ arpproxy(struct in_addr in, unsigned int struct ifnet *ifp; int found = 0; - rt = arplookup(&in, 0, SIN_PROXY, rtableid); + rt = arpproxylookup(&in, rtableid); if (!rtisvalid(rt)) { rtfree(rt); return (0);