? geneve.h ? if_pvlan.c Index: art.c =================================================================== RCS file: /cvs/src/sys/net/art.c,v diff -u -p -r1.31 art.c --- art.c 11 Nov 2023 12:17:50 -0000 1.31 +++ art.c 12 Jun 2025 12:47:32 -0000 @@ -22,6 +22,37 @@ * * Yoichi Hariguchi paper can be found at: * http://www.hariguchi.org/art/art.pdf + * + * This implementation is tweaked to minimise pointer traversal during lookups. + * + * +------------------+ + * | struct art | + * +------------------+ NULL + * | ^ + * | heap entry | at_parent + * v | + * +------------------+ heap[0] +------------------+ + * | heap |----------->| struct art_table | + * +------------------+ +------------------+ + * | ^ + * | heap entry | at_parent + * v | + * +------------------+ heap[0] +------------------+ + * | heap |----------->| struct art_table | + * +------------------+ +------------------+ + * | + * | node entry + * v + * +------------------+ + * | node | + * +------------------+ + * | + * | an_value + * v + * "user" data" + * + * Traversal during lookups only accesses the heaps, it avoids + * following pointers to or through the art_table structures. */ #ifndef _KERNEL @@ -33,43 +64,96 @@ #include #include #include +#include #endif #include -int art_bindex(struct art_table *, const uint8_t *, int); -void art_allot(struct art_table *at, int, struct art_node *, - struct art_node *); -struct art_table *art_table_get(struct art_root *, struct art_table *, - int); -struct art_table *art_table_put(struct art_root *, struct art_table *); -struct art_node *art_table_insert(struct art_root *, struct art_table *, +/* + * Allotment Table. + */ +struct art_table { + art_heap_entry *at_heap; + struct art_table *at_parent; /* Parent table */ + + unsigned int at_index; /* Index in the parent table */ + unsigned int at_minfringe; /* Index that fringe begins */ + + unsigned int at_level; /* Level of the table */ + unsigned int at_bits; /* Stride length of the table */ + unsigned int at_offset; /* Sum of parents' stride len */ + + unsigned int at_refcnt; +}; + +static void art_allot(struct art_table *at, unsigned int, + art_heap_entry, art_heap_entry); +struct art_table *art_table_get(struct art *, struct art_table *, + unsigned int); +struct art_table *art_table_put(struct art *, struct art_table *); +struct art_node *art_table_insert(struct art *, struct art_table *, int, struct art_node *); -struct art_node *art_table_delete(struct art_root *, struct art_table *, +struct art_node *art_table_delete(struct art *, struct art_table *, int, struct art_node *); -struct art_table *art_table_ref(struct art_root *, struct art_table *); -int art_table_free(struct art_root *, struct art_table *); -int art_table_walk(struct art_root *, struct art_table *, - int (*f)(struct art_node *, void *), void *); -int art_walk_apply(struct art_root *, - struct art_node *, struct art_node *, - int (*f)(struct art_node *, void *), void *); +struct art_table *art_table_ref(struct art *, struct art_table *); +int art_table_free(struct art *, struct art_table *); void art_table_gc(void *); void art_gc(void *); -struct pool an_pool, at_pool, at_heap_4_pool, at_heap_8_pool; +static struct pool an_pool, at_pool, at_heap_4_pool, at_heap_8_pool; -struct art_table *art_table_gc_list = NULL; -struct mutex art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); -struct task art_table_gc_task = +static struct art_table *art_table_gc_list = NULL; +static struct mutex art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); +static struct task art_table_gc_task = TASK_INITIALIZER(art_table_gc, NULL); -struct art_node *art_node_gc_list = NULL; -struct mutex art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); -struct task art_node_gc_task = TASK_INITIALIZER(art_gc, NULL); +static struct art_node *art_node_gc_list = NULL; +static struct mutex art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); +static struct task art_node_gc_task = TASK_INITIALIZER(art_gc, NULL); + +#define ART_HEAP_IDX_TABLE 0 +#define ART_HEAP_IDX_DEFAULT 1 + +#define AT_HEAPSIZE(bits) ((1 << ((bits) + 1)) * sizeof(art_heap_entry)) + +static inline struct art_table * +art_heap_to_table(art_heap_entry *heap) +{ + return ((struct art_table *)heap[ART_HEAP_IDX_TABLE]); +} + +static inline int +art_heap_entry_is_node(art_heap_entry ahe) +{ + return (!ISSET(ahe, 1)); +} + +static inline struct art_node * +art_heap_entry_to_node(art_heap_entry ahe) +{ + return ((struct art_node *)ahe); +} + +static inline art_heap_entry * +art_heap_entry_to_heap(art_heap_entry ahe) +{ + return ((art_heap_entry *)(ahe & ~1ULL)); +} + +static inline art_heap_entry +art_node_to_heap_entry(struct art_node *an) +{ + return ((uintptr_t)an); +} + +static inline art_heap_entry +art_heap_to_heap_entry(art_heap_entry *heap) +{ + return ((uintptr_t)heap | 1ULL); +} void -art_init(void) +art_boot(void) { pool_init(&an_pool, sizeof(struct art_node), 0, IPL_SOFTNET, 0, "art_node", NULL); @@ -84,56 +168,74 @@ art_init(void) /* * Per routing table initialization API function. */ -struct art_root * -art_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) -{ - struct art_root *ar; - int i; - ar = malloc(sizeof(*ar), M_RTABLE, M_NOWAIT|M_ZERO); - if (ar == NULL) - return (NULL); +static const unsigned int art_plen32_levels[] = { + 8, 4, 4, 4, 4, 4, 4 +}; + +static const unsigned int art_plen128_levels[] = { + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 +}; + +static const unsigned int art_plen20_levels[] = { + 4, 4, 4, 4, 4 +}; + +void +art_init(struct art *art, unsigned int alen) +{ + const unsigned int *levels; + unsigned int nlevels; +#ifdef DIAGNOSTIC + unsigned int i; + unsigned int bits = 0; +#endif switch (alen) { case 32: - ar->ar_alen = 32; - ar->ar_nlvl = 7; - ar->ar_bits[0] = 8; - for (i = 1; i < ar->ar_nlvl; i++) - ar->ar_bits[i] = 4; + levels = art_plen32_levels; + nlevels = nitems(art_plen32_levels); break; case 128: - ar->ar_alen = 128; - ar->ar_nlvl = 32; - for (i = 0; i < ar->ar_nlvl; i++) - ar->ar_bits[i] = 4; + levels = art_plen128_levels; + nlevels = nitems(art_plen128_levels); + break; + case 20: + levels = art_plen20_levels; + nlevels = nitems(art_plen20_levels); break; default: - printf("%s: incorrect address length %u\n", __func__, alen); - free(ar, M_RTABLE, sizeof(*ar)); - return (NULL); + panic("no configuration for alen %u", alen); + /* NOTREACHED */ } - ar->ar_off = off; - rw_init(&ar->ar_lock, "art"); +#ifdef DIAGNOSTIC + for (i = 0; i < nlevels; i++) + bits += levels[i]; + + if (alen != bits) + panic("sum of levels %u != address len %u", bits, alen); +#endif /* DIAGNOSTIC */ - return (ar); + art->art_root = 0; + art->art_levels = levels; + art->art_nlevels = nlevels; + art->art_alen = alen; } -/* - * Return 1 if ``old'' and ``new`` are identical, 0 otherwise. - */ -static inline int -art_check_duplicate(struct art_root *ar, struct art_node *old, - struct art_node *new) +struct art * +art_alloc(unsigned int alen) { - if (old == NULL) - return (0); + struct art *art; - if (old->an_plen == new->an_plen) - return (1); + art = malloc(sizeof(*art), M_RTABLE, M_NOWAIT|M_ZERO); + if (art == NULL) + return (NULL); - return (0); + art_init(art, alen); + + return (art); } /* @@ -148,30 +250,31 @@ art_check_duplicate(struct art_root *ar, * 8bit-long tables, there's a maximum of 4 base indexes if the * prefix length is > 24. */ -int -art_bindex(struct art_table *at, const uint8_t *addr, int plen) +static unsigned int __pure +art_bindex(unsigned int offset, unsigned int bits, + const uint8_t *addr, unsigned int plen) { - uint8_t boff, bend; + unsigned int boff, bend; uint32_t k; - if (plen < at->at_offset || plen > (at->at_offset + at->at_bits)) - return (-1); + KASSERT(plen >= offset); + KASSERT(plen <= (offset + bits)); /* * We are only interested in the part of the prefix length * corresponding to the range of this table. */ - plen -= at->at_offset; + plen -= offset; /* * Jump to the first byte of the address containing bits * covered by this table. */ - addr += (at->at_offset / 8); + addr += (offset / 8); /* ``at'' covers the bit range between ``boff'' & ``bend''. */ - boff = (at->at_offset % 8); - bend = (at->at_bits + boff); + boff = (offset % 8); + bend = (bits + boff); KASSERT(bend <= 32); @@ -188,25 +291,13 @@ art_bindex(struct art_table *at, const u k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8); k |= addr[1] >> (16 - bend); } else { - k = (addr[0] >> (8 - bend)) & ((1 << at->at_bits) - 1); + k = (addr[0] >> (8 - bend)) & ((1 << bits) - 1); } /* * Single level base index formula: */ - return ((k >> (at->at_bits - plen)) + (1 << plen)); -} - -/* - * Single level lookup function. - * - * Return the fringe index of the part of ``addr'' - * corresponding to the range covered by the table ``at''. - */ -static inline int -art_findex(struct art_table *at, const uint8_t *addr) -{ - return art_bindex(at, addr, (at->at_offset + at->at_bits)); + return ((k >> (bits - plen)) + (1 << plen)); } /* @@ -215,60 +306,94 @@ art_findex(struct art_table *at, const u * Return the best existing match for a destination. */ struct art_node * -art_match(struct art_root *ar, const void *addr, struct srp_ref *nsr) +art_match(struct art *art, const void *addr) { - struct srp_ref dsr, ndsr; - void *entry; - struct art_table *at; - struct art_node *dflt, *ndflt; - int j; - - entry = srp_enter(nsr, &ar->ar_root); - at = entry; + unsigned int offset = 0; + unsigned int level = 0; + art_heap_entry *heap; + art_heap_entry ahe, dahe = 0; + struct art_node *an; + int j; - if (at == NULL) - goto done; + SMR_ASSERT_CRITICAL(); - /* - * Remember the default route of each table we visit in case - * we do not find a better matching route. - */ - dflt = srp_enter(&dsr, &at->at_default); + heap = SMR_PTR_GET(&art->art_root); + if (heap == NULL) + return (NULL); /* * Iterate until we find a leaf. */ - while (1) { + for (;;) { + unsigned int bits = art->art_levels[level]; + unsigned int p = offset + bits; + + /* + * Remember the default route of each table we visit in case + * we do not find a better matching route. + */ + ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]); + if (ahe != 0) + dahe = ahe; + /* Do a single level route lookup. */ - j = art_findex(at, addr); - entry = srp_follow(nsr, &at->at_heap[j].node); + j = art_bindex(offset, bits, addr, p); + ahe = SMR_PTR_GET(&heap[j]); /* If this is a leaf (NULL is a leaf) we're done. */ - if (ISLEAF(entry)) + if (art_heap_entry_is_node(ahe)) break; - at = SUBTABLE(entry); + heap = art_heap_entry_to_heap(ahe); + offset = p; + level++; + + KASSERT(level < art->art_nlevels); + } + + an = art_heap_entry_to_node(ahe); + if (an != NULL) + return (an); + + return art_heap_entry_to_node(dahe); +} + +static int +art_node_check(const struct art_node *an, + const uint8_t *addr, unsigned int plen) +{ + const uint32_t *wan = (const uint32_t *)an->an_addr; + const uint32_t *waddr = (const uint32_t *)addr; + unsigned int i = 0; + unsigned int shift; + + if (an->an_plen != plen) + return (0); + + while (plen >= 32) { + if (wan[i] != waddr[i]) + return (0); + + i++; + plen -= 32; + } + if (plen == 0) + return (1); + + i *= 4; + while (plen >= 8) { + if (an->an_addr[i] != addr[i]) + return (0); + + i++; + plen -= 8; + } + if (plen == 0) + return (1); - ndflt = srp_enter(&ndsr, &at->at_default); - if (ndflt != NULL) { - srp_leave(&dsr); - dsr = ndsr; - dflt = ndflt; - } else - srp_leave(&ndsr); - } - - if (entry == NULL) { - srp_leave(nsr); - *nsr = dsr; - KASSERT(ISLEAF(dflt)); - return (dflt); - } - - srp_leave(&dsr); -done: - KASSERT(ISLEAF(entry)); - return (entry); + shift = 8 - plen; + + return ((an->an_addr[i] >> shift) == (addr[i] >> shift)); } /* @@ -278,24 +403,28 @@ done: * it does not exist. */ struct art_node * -art_lookup(struct art_root *ar, const void *addr, int plen, struct srp_ref *nsr) +art_lookup(struct art *art, const void *addr, unsigned int plen) { - void *entry; - struct art_table *at; - int i, j; + unsigned int offset = 0; + unsigned int bits, p; + unsigned int level = 0; + art_heap_entry *heap; + art_heap_entry ahe; + struct art_node *an; + unsigned int i, j; - KASSERT(plen >= 0 && plen <= ar->ar_alen); + KASSERT(plen <= art->art_alen); - entry = srp_enter(nsr, &ar->ar_root); - at = entry; + SMR_ASSERT_CRITICAL(); - if (at == NULL) - goto done; + heap = SMR_PTR_GET(&art->art_root); + if (heap == NULL) + return (NULL); /* Default route */ if (plen == 0) { - entry = srp_follow(nsr, &at->at_default); - goto done; + ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]); + return art_heap_entry_to_node(ahe); } /* @@ -303,31 +432,47 @@ art_lookup(struct art_root *ar, const vo * the stride length at this level the entry must * be in the current table. */ - while (plen > (at->at_offset + at->at_bits)) { + for (;;) { + bits = art->art_levels[level]; + p = offset + bits; + if (plen <= p) + break; + /* Do a single level route lookup. */ - j = art_findex(at, addr); - entry = srp_follow(nsr, &at->at_heap[j].node); + j = art_bindex(offset, bits, addr, p); + ahe = SMR_PTR_GET(&heap[j]); /* A leaf is a match, but not a perfect one, or NULL */ - if (ISLEAF(entry)) + if (art_heap_entry_is_node(ahe)) return (NULL); - at = SUBTABLE(entry); + heap = art_heap_entry_to_heap(ahe); + offset = p; + level++; + + KASSERT(level < art->art_nlevels); } - i = art_bindex(at, addr, plen); - if (i == -1) - return (NULL); + i = art_bindex(offset, bits, addr, plen); + ahe = SMR_PTR_GET(&heap[i]); + if (!art_heap_entry_is_node(ahe)) { + heap = art_heap_entry_to_heap(ahe); + ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]); + } - entry = srp_follow(nsr, &at->at_heap[i].node); - if (!ISLEAF(entry)) - entry = srp_follow(nsr, &SUBTABLE(entry)->at_default); - -done: - KASSERT(ISLEAF(entry)); - return (entry); + /* Make sure we've got a perfect match */ + an = art_heap_entry_to_node(ahe); + if (an != NULL && art_node_check(an, addr, plen)) + return (an); + + return (NULL); } +int +art_is_empty(struct art *art) +{ + return (SMR_PTR_GET_LOCKED(&art->art_root) == NULL); +} /* * Insertion API function. @@ -336,32 +481,38 @@ done: * same destination/mask pair is already present. */ struct art_node * -art_insert(struct art_root *ar, struct art_node *an, const void *addr, int plen) +art_insert(struct art *art, struct art_node *an) { - struct art_table *at, *child; - struct art_node *node; - int i, j; - - rw_assert_wrlock(&ar->ar_lock); - KASSERT(plen >= 0 && plen <= ar->ar_alen); - - at = srp_get_locked(&ar->ar_root); - if (at == NULL) { - at = art_table_get(ar, NULL, -1); + unsigned int p; + art_heap_entry ahe, oahe, *ahep; + art_heap_entry *heap; + struct art_node *oan; + struct art_table *at; + unsigned int i, j; + + KASSERT(an->an_plen <= art->art_alen); + + heap = SMR_PTR_GET_LOCKED(&art->art_root); + if (heap == NULL) { + at = art_table_get(art, NULL, -1); if (at == NULL) return (NULL); - srp_swap_locked(&ar->ar_root, at); - } + heap = at->at_heap; + SMR_PTR_SET_LOCKED(&art->art_root, heap); + } else + at = art_heap_to_table(heap); /* Default route */ - if (plen == 0) { - node = srp_get_locked(&at->at_default); - if (node != NULL) - return (node); + if (an->an_plen == 0) { + ahep = &heap[ART_HEAP_IDX_DEFAULT]; + oahe = SMR_PTR_GET_LOCKED(ahep); + oan = art_heap_entry_to_node(oahe); + if (oan != NULL) + return (oan); - art_table_ref(ar, at); - srp_swap_locked(&at->at_default, an); + art_table_ref(art, at); + SMR_PTR_SET_LOCKED(ahep, art_node_to_heap_entry(an)); return (an); } @@ -370,10 +521,11 @@ art_insert(struct art_root *ar, struct a * the stride length at this level the entry must * be in the current table. */ - while (plen > (at->at_offset + at->at_bits)) { + while ((p = at->at_offset + at->at_bits) < an->an_plen) { /* Do a single level route lookup. */ - j = art_findex(at, addr); - node = srp_get_locked(&at->at_heap[j].node); + j = art_bindex(at->at_offset, at->at_bits, an->an_addr, p); + ahep = &heap[j]; + ahe = SMR_PTR_GET_LOCKED(ahep); /* * If the node corresponding to the fringe index is @@ -381,84 +533,86 @@ art_insert(struct art_root *ar, struct a * entry of this node will then become the default * route of the subtable. */ - if (ISLEAF(node)) { - child = art_table_get(ar, at, j); + if (art_heap_entry_is_node(ahe)) { + struct art_table *child = art_table_get(art, at, j); if (child == NULL) return (NULL); - art_table_ref(ar, at); - srp_swap_locked(&at->at_heap[j].node, ASNODE(child)); + art_table_ref(art, at); + at = child; - } else - at = SUBTABLE(node); + heap = at->at_heap; + SMR_PTR_SET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT], ahe); + SMR_PTR_SET_LOCKED(ahep, art_heap_to_heap_entry(heap)); + } else { + heap = art_heap_entry_to_heap(ahe); + at = art_heap_to_table(heap); + } } - i = art_bindex(at, addr, plen); - if (i == -1) - return (NULL); - - return (art_table_insert(ar, at, i, an)); -} - -/* - * Single level insertion. - */ -struct art_node * -art_table_insert(struct art_root *ar, struct art_table *at, int i, - struct art_node *an) -{ - struct art_node *prev, *node; - - node = srp_get_locked(&at->at_heap[i].node); - if (!ISLEAF(node)) - prev = srp_get_locked(&SUBTABLE(node)->at_default); - else - prev = node; - - if (art_check_duplicate(ar, prev, an)) - return (prev); + i = art_bindex(at->at_offset, at->at_bits, an->an_addr, an->an_plen); + ahep = &heap[i]; + oahe = SMR_PTR_GET_LOCKED(ahep); + if (!art_heap_entry_is_node(oahe)) { + heap = art_heap_entry_to_heap(oahe); + ahep = &heap[ART_HEAP_IDX_DEFAULT]; + oahe = SMR_PTR_GET_LOCKED(ahep); + } - art_table_ref(ar, at); + /* Check if there's an existing node */ + oan = art_heap_entry_to_node(oahe); + if (oan != NULL && art_node_check(oan, an->an_addr, an->an_plen)) + return (oan); /* * If the index `i' of the route that we are inserting is not * a fringe index, we need to allot this new route pointer to * all the corresponding fringe indices. */ + art_table_ref(art, at); + ahe = art_node_to_heap_entry(an); if (i < at->at_minfringe) - art_allot(at, i, prev, an); - else if (!ISLEAF(node)) - srp_swap_locked(&SUBTABLE(node)->at_default, an); + art_allot(at, i, oahe, ahe); else - srp_swap_locked(&at->at_heap[i].node, an); + SMR_PTR_SET_LOCKED(ahep, ahe); return (an); } - /* * Deletion API function. */ struct art_node * -art_delete(struct art_root *ar, struct art_node *an, const void *addr, int plen) +art_delete(struct art *art, const void *addr, unsigned int plen) { + unsigned int p; + art_heap_entry *heap; struct art_table *at; - struct art_node *node; - int i, j; + art_heap_entry ahe, pahe, *ahep; + struct art_node *an; + unsigned int i, j; - rw_assert_wrlock(&ar->ar_lock); - KASSERT(plen >= 0 && plen <= ar->ar_alen); + KASSERT(plen <= art->art_alen); - at = srp_get_locked(&ar->ar_root); - if (at == NULL) + heap = SMR_PTR_GET_LOCKED(&art->art_root); + if (heap == NULL) return (NULL); + at = art_heap_to_table(heap); + /* Default route */ if (plen == 0) { - node = srp_get_locked(&at->at_default); - srp_swap_locked(&at->at_default, NULL); - art_table_free(ar, at); - return (node); + ahep = &heap[ART_HEAP_IDX_DEFAULT]; + + ahe = SMR_PTR_GET_LOCKED(ahep); + an = art_heap_entry_to_node(ahe); + if (an == NULL) + return (NULL); + + SMR_PTR_SET_LOCKED(ahep, 0); + art_table_free(art, at); + + return (an); } /* @@ -466,53 +620,37 @@ art_delete(struct art_root *ar, struct a * the stride length at this level the entry must * be in the current table. */ - while (plen > (at->at_offset + at->at_bits)) { + while ((p = at->at_offset + at->at_bits) < plen) { /* Do a single level route lookup. */ - j = art_findex(at, addr); - node = srp_get_locked(&at->at_heap[j].node); + j = art_bindex(at->at_offset, at->at_bits, addr, p); + ahe = SMR_PTR_GET_LOCKED(&heap[j]); /* If this is a leaf, there is no route to delete. */ - if (ISLEAF(node)) + if (art_heap_entry_is_node(ahe)) return (NULL); - at = SUBTABLE(node); + heap = art_heap_entry_to_heap(ahe); + at = art_heap_to_table(heap); } - i = art_bindex(at, addr, plen); - if (i == -1) + i = art_bindex(at->at_offset, at->at_bits, addr, plen); + ahep = &heap[i]; + ahe = SMR_PTR_GET_LOCKED(ahep); + if (!art_heap_entry_is_node(ahe)) { + art_heap_entry *nheap = art_heap_entry_to_heap(ahe); + ahep = &nheap[ART_HEAP_IDX_DEFAULT]; + ahe = SMR_PTR_GET_LOCKED(ahep); + } + + an = art_heap_entry_to_node(ahe); + if (an == NULL) { + /* No route to delete */ return (NULL); - - return (art_table_delete(ar, at, i, an)); -} - -/* - * Single level deletion. - */ -struct art_node * -art_table_delete(struct art_root *ar, struct art_table *at, int i, - struct art_node *an) -{ - struct art_node *next, *node; - -#ifdef DIAGNOSTIC - struct art_node *prev; -#endif - - node = srp_get_locked(&at->at_heap[i].node); -#ifdef DIAGNOSTIC - if (!ISLEAF(node)) - prev = srp_get_locked(&SUBTABLE(node)->at_default); - else - prev = node; - - KASSERT(prev == an); -#endif + } /* Get the next most specific route for the index `i'. */ - if ((i >> 1) > 1) - next = srp_get_locked(&at->at_heap[i >> 1].node); - else - next = NULL; + j = i >> 1; + pahe = (j > 1) ? SMR_PTR_GET_LOCKED(&heap[j]) : 0; /* * If the index `i' of the route that we are removing is not @@ -520,20 +658,18 @@ art_table_delete(struct art_root *ar, st * route pointer to all the corresponding fringe indices. */ if (i < at->at_minfringe) - art_allot(at, i, an, next); - else if (!ISLEAF(node)) - srp_swap_locked(&SUBTABLE(node)->at_default, next); + art_allot(at, i, ahe, pahe); else - srp_swap_locked(&at->at_heap[i].node, next); + SMR_PTR_SET_LOCKED(ahep, pahe); /* We have removed an entry from this table. */ - art_table_free(ar, at); + art_table_free(art, at); return (an); } struct art_table * -art_table_ref(struct art_root *ar, struct art_table *at) +art_table_ref(struct art *art, struct art_table *at) { at->at_refcnt++; return (at); @@ -544,12 +680,11 @@ art_table_rele(struct art_table *at) { if (at == NULL) return (0); - return (--at->at_refcnt == 0); } int -art_table_free(struct art_root *ar, struct art_table *at) +art_table_free(struct art *art, struct art_table *at) { if (art_table_rele(at)) { /* @@ -557,7 +692,7 @@ art_table_free(struct art_root *ar, stru * that are empty. */ do { - at = art_table_put(ar, at); + at = art_table_put(art, at); } while (art_table_rele(at)); return (1); @@ -569,116 +704,187 @@ art_table_free(struct art_root *ar, stru /* * Iteration API function. */ -int -art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg) + +static struct art_node * +art_iter_descend(struct art_iter *ai, art_heap_entry *heap, + art_heap_entry pahe) { - struct srp_ref sr; - struct art_table *at; - struct art_node *node; - int error = 0; + struct art_table *at; + art_heap_entry ahe; - rw_enter_write(&ar->ar_lock); - at = srp_get_locked(&ar->ar_root); - if (at != NULL) { - art_table_ref(ar, at); + at = art_heap_to_table(heap); + ai->ai_table = art_table_ref(ai->ai_art, at); - /* - * The default route should be processed here because the root - * table does not have a parent. - */ - node = srp_enter(&sr, &at->at_default); - error = art_walk_apply(ar, node, NULL, f, arg); - srp_leave(&sr); + /* + * Start looking at non-fringe nodes. + */ + ai->ai_j = 1; + + /* + * The default route (index 1) is processed by the + * parent table (where it belongs) otherwise it could + * be processed more than once. + */ + ai->ai_i = 2; + + /* + * Process the default route now. + */ + ahe = SMR_PTR_GET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT]); + if (ahe != 0 && ahe != pahe) + return (art_heap_entry_to_node(ahe)); + + /* + * Tell the caller to proceed with art_iter_next. + */ + return (NULL); +} + +struct art_node * +art_iter_open(struct art *art, struct art_iter *ai) +{ + art_heap_entry *heap = SMR_PTR_GET(&art->art_root); + struct art_node *an; - if (error == 0) - error = art_table_walk(ar, at, f, arg); + ai->ai_art = art; - art_table_free(ar, at); + if (heap == NULL) { + /* empty, we're already done */ + ai->ai_table = NULL; + return (NULL); } - rw_exit_write(&ar->ar_lock); - return (error); + an = art_iter_descend(ai, heap, 0); + if (an != NULL) + return (an); /* default route */ + + return (art_iter_next(ai)); } -int -art_table_walk(struct art_root *ar, struct art_table *at, - int (*f)(struct art_node *, void *), void *arg) +struct art_node * +art_iter_next(struct art_iter *ai) { - struct srp_ref sr; - struct art_node *node, *next; - struct art_table *nat; - int i, j, error = 0; - uint32_t maxfringe = (at->at_minfringe << 1); + struct art_table *at = ai->ai_table; + art_heap_entry *heap = at->at_heap; + art_heap_entry ahe, pahe; + unsigned int i; +descend: /* * Iterate non-fringe nodes in ``natural'' order. */ - for (j = 1; j < at->at_minfringe; j += 2) { - /* - * The default route (index 1) is processed by the - * parent table (where it belongs) otherwise it could - * be processed more than once. - */ - for (i = max(j, 2); i < at->at_minfringe; i <<= 1) { - next = srp_get_locked(&at->at_heap[i >> 1].node); - - node = srp_enter(&sr, &at->at_heap[i].node); - error = art_walk_apply(ar, node, next, f, arg); - srp_leave(&sr); + if (ai->ai_j < at->at_minfringe) { + for (;;) { + while ((i = ai->ai_i) < at->at_minfringe) { + ai->ai_i = i << 1; + + pahe = SMR_PTR_GET_LOCKED(&heap[i >> 1]); + ahe = SMR_PTR_GET_LOCKED(&heap[i]); + if (ahe != 0 && ahe != pahe) + return (art_heap_entry_to_node(ahe)); + } - if (error != 0) - return (error); + ai->ai_j += 2; + if (ai->ai_j < at->at_minfringe) + ai->ai_i = ai->ai_j; + else { + /* Set up the fringe loop */ + ai->ai_i = at->at_minfringe; + break; + } } } /* - * Iterate fringe nodes. + * Descendent tables are only found on fringe nodes, and + * they record where they were placed in their parent. This + * allows the iterator to know where to resume traversal when + * it ascends back to the parent table. By keeping the table + * refs when going down the tree, the topolgy is preserved + * even if all the nodes are removed. */ - for (i = at->at_minfringe; i < maxfringe; i++) { - next = srp_get_locked(&at->at_heap[i >> 1].node); - node = srp_enter(&sr, &at->at_heap[i].node); - if (!ISLEAF(node)) { - nat = art_table_ref(ar, SUBTABLE(node)); - node = srp_follow(&sr, &nat->at_default); - } else - nat = NULL; + for (;;) { + unsigned int maxfringe = at->at_minfringe << 1; + struct art_table *parent; - error = art_walk_apply(ar, node, next, f, arg); - srp_leave(&sr); + /* + * Iterate fringe nodes. + */ + while ((i = ai->ai_i) < maxfringe) { + ai->ai_i = i + 1; - if (error != 0) { - art_table_free(ar, nat); - return (error); + pahe = SMR_PTR_GET_LOCKED(&heap[i >> 1]); + ahe = SMR_PTR_GET_LOCKED(&heap[i]); + if (art_heap_entry_is_node(ahe)) { + if (ahe != 0 && ahe != pahe) + return (art_heap_entry_to_node(ahe)); + } else { + struct art_node *an; + + heap = art_heap_entry_to_heap(ahe); + + an = art_iter_descend(ai, heap, pahe); + if (an != NULL) /* default route? */ + return (an); + + /* Start looping over the child table */ + at = art_heap_to_table(heap); + goto descend; + } } - if (nat != NULL) { - error = art_table_walk(ar, nat, f, arg); - art_table_free(ar, nat); - if (error != 0) - return (error); + /* + * Ascend back up to the parent + */ + parent = at->at_parent; + ai->ai_i = at->at_index + 1; + art_table_free(ai->ai_art, at); + + ai->ai_table = parent; + if (parent == NULL) { + /* The root table has no parent */ + break; } + + at = parent; + ai->ai_j = at->at_minfringe; + heap = at->at_heap; } - return (0); + return (NULL); } -int -art_walk_apply(struct art_root *ar, - struct art_node *an, struct art_node *next, - int (*f)(struct art_node *, void *), void *arg) +void +art_iter_close(struct art_iter *ai) { - int error = 0; + struct art_table *at, *parent; - if ((an != NULL) && (an != next)) { - rw_exit_write(&ar->ar_lock); - error = (*f)(an, arg); - rw_enter_write(&ar->ar_lock); + for (at = ai->ai_table; at != NULL; at = parent) { + parent = at->at_parent; + art_table_free(ai->ai_art, at); } - return (error); + ai->ai_table = NULL; } +int +art_walk(struct art *art, int (*f)(struct art_node *, void *), void *arg) +{ + struct art_iter ai; + struct art_node *an; + int error = 0; + + ART_FOREACH(an, art, &ai) { + error = f(an, arg); + if (error != 0) { + art_iter_close(&ai); + return (error); + } + } + + return (0); +} /* * Create a table and use the given index to set its default route. @@ -686,89 +892,90 @@ art_walk_apply(struct art_root *ar, * Note: This function does not modify the root or the parent. */ struct art_table * -art_table_get(struct art_root *ar, struct art_table *parent, int j) +art_table_get(struct art *art, struct art_table *parent, unsigned int j) { struct art_table *at; - struct art_node *node; - void *at_heap; - uint32_t lvl; + art_heap_entry *heap; + unsigned int level; + unsigned int bits; - KASSERT(j != 0 && j != 1); + KASSERT(j != ART_HEAP_IDX_TABLE && j != ART_HEAP_IDX_DEFAULT); KASSERT(parent != NULL || j == -1); - if (parent != NULL) - lvl = parent->at_level + 1; - else - lvl = 0; - - KASSERT(lvl < ar->ar_nlvl); + level = (parent != NULL) ? parent->at_level + 1 : 0; + KASSERT(level < art->art_nlevels); at = pool_get(&at_pool, PR_NOWAIT|PR_ZERO); if (at == NULL) return (NULL); - switch (AT_HEAPSIZE(ar->ar_bits[lvl])) { - case AT_HEAPSIZE(4): - at_heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO); + bits = art->art_levels[level]; + switch (bits) { + case 4: + heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO); break; - case AT_HEAPSIZE(8): - at_heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO); + case 8: + heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO); break; default: - panic("incorrect stride length %u", ar->ar_bits[lvl]); + panic("incorrect stride length %u", bits); } - if (at_heap == NULL) { + if (heap == NULL) { pool_put(&at_pool, at); return (NULL); } + heap[ART_HEAP_IDX_TABLE] = (uintptr_t)at; + at->at_parent = parent; at->at_index = j; - at->at_minfringe = (1 << ar->ar_bits[lvl]); - at->at_level = lvl; - at->at_bits = ar->ar_bits[lvl]; - at->at_heap = at_heap; + at->at_minfringe = (1 << bits); + at->at_level = level; + at->at_bits = bits; + at->at_heap = heap; at->at_refcnt = 0; if (parent != NULL) { - node = srp_get_locked(&parent->at_heap[j].node); - /* node isn't being deleted, no srp_finalize needed */ - srp_swap_locked(&at->at_default, node); + art_heap_entry ahe; + at->at_offset = (parent->at_offset + parent->at_bits); + + ahe = SMR_PTR_GET_LOCKED(&parent->at_heap[j]); + SMR_PTR_SET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT], ahe); } return (at); } - /* * Delete a table and use its index to restore its parent's default route. * * Note: Modify its parent to unlink the table from it. */ struct art_table * -art_table_put(struct art_root *ar, struct art_table *at) +art_table_put(struct art *art, struct art_table *at) { struct art_table *parent = at->at_parent; - struct art_node *node; - uint32_t j = at->at_index; + unsigned int j = at->at_index; KASSERT(at->at_refcnt == 0); KASSERT(j != 0 && j != 1); if (parent != NULL) { + art_heap_entry ahe; + KASSERT(j != -1); KASSERT(at->at_level == parent->at_level + 1); KASSERT(parent->at_refcnt >= 1); /* Give the route back to its parent. */ - node = srp_get_locked(&at->at_default); - srp_swap_locked(&parent->at_heap[j].node, node); + ahe = SMR_PTR_GET_LOCKED(&at->at_heap[ART_HEAP_IDX_DEFAULT]); + SMR_PTR_SET_LOCKED(&parent->at_heap[j], ahe); } else { KASSERT(j == -1); KASSERT(at->at_level == 0); - srp_swap_locked(&ar->ar_root, NULL); + SMR_PTR_SET_LOCKED(&art->art_root, NULL); } mtx_enter(&art_table_gc_mtx); @@ -791,19 +998,16 @@ art_table_gc(void *null) art_table_gc_list = NULL; mtx_leave(&art_table_gc_mtx); + smr_barrier(); + while (at != NULL) { next = at->at_parent; - if (at->at_level == 0) - srp_finalize(at, "arttfini"); - else - srp_finalize(ASNODE(at), "arttfini"); - - switch (AT_HEAPSIZE(at->at_bits)) { - case AT_HEAPSIZE(4): + switch (at->at_bits) { + case 4: pool_put(&at_heap_4_pool, at->at_heap); break; - case AT_HEAPSIZE(8): + case 8: pool_put(&at_heap_8_pool, at->at_heap); break; default: @@ -839,40 +1043,45 @@ art_table_gc(void *null) * art_allot(at, k, old, new); * } */ -void -art_allot(struct art_table *at, int i, struct art_node *old, - struct art_node *new) -{ - struct art_node *node, *dflt; - int k = i; +static void +art_allot(struct art_table *at, unsigned int i, + art_heap_entry oahe, art_heap_entry nahe) +{ + art_heap_entry ahe, *ahep; + art_heap_entry *heap; + unsigned int k = i; KASSERT(i < at->at_minfringe); + heap = at->at_heap; + again: k <<= 1; if (k < at->at_minfringe) goto nonfringe; /* Change fringe nodes. */ - while (1) { - node = srp_get_locked(&at->at_heap[k].node); - if (!ISLEAF(node)) { - dflt = srp_get_locked(&SUBTABLE(node)->at_default); - if (dflt == old) { - srp_swap_locked(&SUBTABLE(node)->at_default, - new); - } - } else if (node == old) { - srp_swap_locked(&at->at_heap[k].node, new); + for (;;) { + ahep = &heap[k]; + ahe = SMR_PTR_GET_LOCKED(ahep); + + if (!art_heap_entry_is_node(ahe)) { + art_heap_entry *child = art_heap_entry_to_heap(ahe); + ahep = &child[ART_HEAP_IDX_DEFAULT]; + ahe = SMR_PTR_GET_LOCKED(ahep); } + + if (ahe == oahe) + SMR_PTR_SET_LOCKED(ahep, nahe); + if (k % 2) goto moveup; k++; } nonfringe: - node = srp_get_locked(&at->at_heap[k].node); - if (node == old) + ahe = SMR_PTR_GET_LOCKED(&heap[k]); + if (ahe == oahe) goto again; moveon: if (k % 2) @@ -881,7 +1090,7 @@ moveon: goto nonfringe; moveup: k >>= 1; - srp_swap_locked(&at->at_heap[k].node, new); + SMR_PTR_SET_LOCKED(&heap[k], nahe); /* Change non-fringe node. */ if (k != i) @@ -889,7 +1098,7 @@ moveup: } struct art_node * -art_get(uint8_t plen) +art_get(const uint8_t *addr, unsigned int plen) { struct art_node *an; @@ -897,17 +1106,25 @@ art_get(uint8_t plen) if (an == NULL) return (NULL); - an->an_plen = plen; - SRPL_INIT(&an->an_rtlist); + art_node_init(an, addr, plen); return (an); } void -art_put(struct art_node *an) +art_node_init(struct art_node *an, const uint8_t *addr, unsigned int plen) { - KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist)); + size_t len; + + len = roundup(plen, 8) / 8; + KASSERT(len <= sizeof(an->an_addr)); + memcpy(an->an_addr, addr, len); + an->an_plen = plen; +} +void +art_put(struct art_node *an) +{ mtx_enter(&art_node_gc_mtx); an->an_gc = art_node_gc_list; art_node_gc_list = an; @@ -919,17 +1136,17 @@ art_put(struct art_node *an) void art_gc(void *null) { - struct art_node *an, *next; + struct art_node *an; mtx_enter(&art_node_gc_mtx); an = art_node_gc_list; art_node_gc_list = NULL; mtx_leave(&art_node_gc_mtx); - while (an != NULL) { - next = an->an_gc; + smr_barrier(); - srp_finalize(an, "artnfini"); + while (an != NULL) { + struct art_node *next = an->an_gc; pool_put(&an_pool, an); Index: art.h =================================================================== RCS file: /cvs/src/sys/net/art.h,v diff -u -p -r1.25 art.h --- art.h 11 Nov 2023 12:17:50 -0000 1.25 +++ art.h 12 Jun 2025 12:47:32 -0000 @@ -19,94 +19,86 @@ #ifndef _NET_ART_H_ #define _NET_ART_H_ -#include -#include - -#define ART_MAXLVL 32 /* We currently use 32 levels for IPv6. */ - /* - * Root of the ART tables, equivalent to the radix head. + * Allotment Routing Table (ART) + * + * Yoichi Hariguchi paper can be found at: + * http://www.hariguchi.org/art/art.pdf + * + * Locking: + * + * Modification (ie, art_insert or art_delete) and iteration over + * the ART must be serialised by the caller. Lookups (ie, art_match + * and art_lookup) are safe to run within an SMR critical section. * - * Locks used to protect struct members in this file: - * I immutable after creation - * l root's `ar_lock' - * K kernel lock - * For SRP related structures that allow lock-free reads, the write lock - * is indicated below. + * Iteration requires serialisation as it manipulates the reference + * counts on tables as it traverses the tree. The iterator maintains + * these references until it runs out of entries, or art_iter_close + * is called. This allows code iterating over the ART to any locks + * that were serialising the calls to art_iter_open or art_iter_next. + * Note, the iterator does not hold a reference art_node structure, + * it must be accounted for separately, or not accessed after the + * locks are released. */ -struct art_root { - struct srp ar_root; /* [l] First table */ - struct rwlock ar_lock; /* [] Serialise modifications */ - uint8_t ar_bits[ART_MAXLVL]; /* [I] Per level stride */ - uint8_t ar_nlvl; /* [I] Number of levels */ - uint8_t ar_alen; /* [I] Address length in bits */ - uint8_t ar_off; /* [I] Offset of key in bytes */ - struct sockaddr *ar_source; /* [N] use optional src addr */ -}; -#define ISLEAF(e) (((unsigned long)(e) & 1) == 0) -#define SUBTABLE(e) ((struct art_table *)((unsigned long)(e) & ~1)) -#define ASNODE(t) ((struct art_node *)((unsigned long)(t) | 1)) +typedef uintptr_t art_heap_entry; /* - * Allotment Table. + * Root of the ART, equivalent to the radix head. */ -struct art_table { - struct art_table *at_parent; /* Parent table */ - uint32_t at_index; /* Index in the parent table */ - uint32_t at_minfringe; /* Index that fringe begins */ - uint32_t at_level; /* Level of the table */ - uint8_t at_bits; /* Stride length of the table */ - uint8_t at_offset; /* Sum of parents' stride len */ - - /* - * Items stored in the heap are pointers to nodes, in the leaf - * case, or tables otherwise. One exception is index 0 which - * is a route counter. - */ - union { - struct srp node; - unsigned long count; - } *at_heap; /* Array of 2^(slen+1) items */ -}; -#define at_refcnt at_heap[0].count/* Refcounter (1 per different route) */ -#define at_default at_heap[1].node /* Default route (was in parent heap) */ -/* Heap size for an ART table of stride length ``slen''. */ -#define AT_HEAPSIZE(slen) ((1 << ((slen) + 1)) * sizeof(void *)) - -/* - * Forward declaration needed for the list of mpath routes - * attached to a single ART node. - */ -struct rtentry; +struct art { + art_heap_entry *art_root; + const unsigned int *art_levels; + unsigned int art_nlevels; + unsigned int art_alen; +}; /* - * A node is the internal representation of a route entry. + * ART nodes, what is added to the ART */ struct art_node { + void *an_value; union { - SRPL_HEAD(, rtentry) an__rtlist; /* Route related to this node */ - struct art_node *an__gc; /* Entry on GC list */ - } an_pointer; - uint8_t an_plen; /* Prefix length */ + struct art_node *an__gc; + uint8_t an__addr[16]; + } an__u; +#define an_gc an__u.an__gc +#define an_addr an__u.an__addr + unsigned int an_plen; }; -#define an_rtlist an_pointer.an__rtlist -#define an_gc an_pointer.an__gc -void art_init(void); -struct art_root *art_alloc(unsigned int, unsigned int, unsigned int); -struct art_node *art_insert(struct art_root *, struct art_node *, const void *, - int); -struct art_node *art_delete(struct art_root *, struct art_node *, const void *, - int); -struct art_node *art_match(struct art_root *, const void *, struct srp_ref *); -struct art_node *art_lookup(struct art_root *, const void *, int, - struct srp_ref *); -int art_walk(struct art_root *, - int (*)(struct art_node *, void *), void *); - -struct art_node *art_get(uint8_t); +void art_boot(void); +struct art *art_alloc(unsigned int); +void art_init(struct art *, unsigned int); +struct art_node *art_insert(struct art *, struct art_node *); +struct art_node *art_delete(struct art *, const void *, unsigned int); +struct art_node *art_match(struct art *, const void *); +struct art_node *art_lookup(struct art *, const void *, unsigned int); +int art_is_empty(struct art *); + +struct art_node *art_get(const uint8_t *, unsigned int); +void art_node_init(struct art_node *, + const uint8_t *, unsigned int); void art_put(struct art_node *); + +struct art_iter { + struct art *ai_art; + struct art_table *ai_table; + unsigned int ai_j; + unsigned int ai_i; +}; + +struct art_node *art_iter_open(struct art *, struct art_iter *); +struct art_node *art_iter_next(struct art_iter *); +void art_iter_close(struct art_iter *); + +#define ART_FOREACH(_an, _art, _ai) \ + for ((_an) = art_iter_open((_art), (_ai)); \ + (_an) != NULL; \ + (_an) = art_iter_next((_ai))) + +int art_walk(struct art *, + int (*)(struct art_node *, void *), void *); #endif /* _NET_ART_H_ */ Index: if_tun.c =================================================================== RCS file: /cvs/src/sys/net/if_tun.c,v diff -u -p -r1.251 if_tun.c --- if_tun.c 2 Mar 2025 21:28:32 -0000 1.251 +++ if_tun.c 12 Jun 2025 12:47:33 -0000 @@ -318,8 +318,6 @@ tun_clone_destroy(struct ifnet *ifp) if (vfinddev(dev, VCHR, &vp)) VOP_REVOKE(vp, REVOKEALL); - - KASSERT(sc->sc_dev == 0); } /* prevent userland from getting to the device again */ Index: if_wg.c =================================================================== RCS file: /cvs/src/sys/net/if_wg.c,v diff -u -p -r1.42 if_wg.c --- if_wg.c 17 Apr 2025 12:42:50 -0000 1.42 +++ if_wg.c 12 Jun 2025 12:47:33 -0000 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -251,10 +252,11 @@ struct wg_softc { struct socket *sc_so6; #endif + struct rwlock sc_aip_lock; size_t sc_aip_num; - struct art_root *sc_aip4; + struct art *sc_aip4; #ifdef INET6 - struct art_root *sc_aip6; + struct art *sc_aip6; #endif struct rwlock sc_peer_lock; @@ -290,7 +292,7 @@ void wg_peer_counters_add(struct wg_peer int wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *); struct wg_peer * - wg_aip_lookup(struct art_root *, void *); + wg_aip_lookup(struct art *, void *); int wg_aip_remove(struct wg_softc *, struct wg_peer *, struct wg_aip_io *); @@ -609,7 +611,7 @@ wg_peer_counters_add(struct wg_peer *pee int wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d) { - struct art_root *root; + struct art *root; struct art_node *node; struct wg_aip *aip; int ret = 0; @@ -625,8 +627,10 @@ wg_aip_add(struct wg_softc *sc, struct w if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT|PR_ZERO)) == NULL) return ENOBUFS; - rw_enter_write(&root->ar_lock); - node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr); + art_node_init(&aip->a_node, &d->a_addr.addr_bytes, d->a_cidr); + + rw_enter_write(&sc->sc_aip_lock); + node = art_insert(root, &aip->a_node); if (node == &aip->a_node) { aip->a_peer = peer; @@ -642,18 +646,18 @@ wg_aip_add(struct wg_softc *sc, struct w aip->a_peer = peer; } } - rw_exit_write(&root->ar_lock); + rw_exit_write(&sc->sc_aip_lock); return ret; } struct wg_peer * -wg_aip_lookup(struct art_root *root, void *addr) +wg_aip_lookup(struct art *root, void *addr) { - struct srp_ref sr; struct art_node *node; - node = art_match(root, addr, &sr); - srp_leave(&sr); + smr_read_enter(); + node = art_match(root, addr); + smr_read_leave(); return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer; } @@ -661,8 +665,7 @@ wg_aip_lookup(struct art_root *root, voi int wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d) { - struct srp_ref sr; - struct art_root *root; + struct art *root; struct art_node *node; struct wg_aip *aip; int ret = 0; @@ -675,23 +678,21 @@ wg_aip_remove(struct wg_softc *sc, struc default: return EAFNOSUPPORT; } - rw_enter_write(&root->ar_lock); - if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) { + rw_enter_write(&sc->sc_aip_lock); + if ((node = art_lookup(root, &d->a_addr, d->a_cidr)) == NULL) { ret = ENOENT; } else if (((struct wg_aip *) node)->a_peer != peer) { ret = EXDEV; } else { aip = (struct wg_aip *)node; - if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL) + if (art_delete(root, &d->a_addr, d->a_cidr) == NULL) panic("art_delete failed to delete node %p", node); sc->sc_aip_num--; LIST_REMOVE(aip, a_entry); pool_put(&wg_aip_pool, aip); } - - srp_leave(&sr); - rw_exit_write(&root->ar_lock); + rw_exit_write(&sc->sc_aip_lock); return ret; } @@ -2726,10 +2727,11 @@ wg_clone_create(struct if_clone *ifc, in #endif sc->sc_aip_num = 0; - if ((sc->sc_aip4 = art_alloc(0, 32, 0)) == NULL) + rw_init(&sc->sc_aip_lock, "wgaip"); + if ((sc->sc_aip4 = art_alloc(32)) == NULL) goto ret_02; #ifdef INET6 - if ((sc->sc_aip6 = art_alloc(0, 128, 0)) == NULL) + if ((sc->sc_aip6 = art_alloc(128)) == NULL) goto ret_03; #endif Index: if_wg.h =================================================================== RCS file: /cvs/src/sys/net/if_wg.h,v diff -u -p -r1.6 if_wg.h --- if_wg.h 13 Oct 2024 00:53:21 -0000 1.6 +++ if_wg.h 12 Jun 2025 12:47:33 -0000 @@ -46,6 +46,7 @@ struct wg_aip_io { sa_family_t a_af; int a_cidr; union wg_aip_addr { + uint8_t addr_bytes; struct in_addr addr_ipv4; struct in6_addr addr_ipv6; } a_addr; Index: route.h =================================================================== RCS file: /cvs/src/sys/net/route.h,v diff -u -p -r1.216 route.h --- route.h 16 Mar 2025 21:58:08 -0000 1.216 +++ route.h 12 Jun 2025 12:47:33 -0000 @@ -40,7 +40,7 @@ * I immutable after creation * N net lock * X exclusive net lock, or shared net lock + kernel lock - * R art (rtable) lock + * R rtable lock * r per route entry mutex rt_mtx * L arp/nd6/etc lock for updates, net lock for reads * T rttimer_mtx route timer lists @@ -117,7 +117,7 @@ struct rttimer; struct rtentry { struct sockaddr *rt_dest; /* [I] destination */ - SRPL_ENTRY(rtentry) rt_next; /* [R] next mpath entry to our dst */ + struct rtentry *rt_next; /* [R] next mpath entry to our dst */ struct sockaddr *rt_gateway; /* [X] gateway address */ struct ifaddr *rt_ifa; /* [N] interface addr to use */ caddr_t rt_llinfo; /* [L] pointer to link level info or Index: rtable.c =================================================================== RCS file: /cvs/src/sys/net/rtable.c,v diff -u -p -r1.87 rtable.c --- rtable.c 9 Apr 2024 12:53:08 -0000 1.87 +++ rtable.c 12 Jun 2025 12:47:33 -0000 @@ -26,12 +26,21 @@ #include #include #include +#include #endif #include #include #include +struct rtable { + struct rwlock r_lock; + struct art *r_art; /* [I] */ + unsigned int r_off; /* [I] Offset of key in bytes */ + + struct sockaddr *r_source; /* [N] use optional src addr */ +}; + /* * Structures used by rtable_get() to retrieve the corresponding * routing table for a given pair of ``af'' and ``rtableid''. @@ -85,8 +94,8 @@ void rtmap_dtor(void *, void *); struct srp_gc rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL); void rtable_init_backend(void); -void *rtable_alloc(unsigned int, unsigned int, unsigned int); -void *rtable_get(unsigned int, sa_family_t); +struct rtable *rtable_alloc(unsigned int, unsigned int, unsigned int); +struct rtable *rtable_get(unsigned int, sa_family_t); void rtmap_init(void) @@ -193,7 +202,7 @@ int rtable_add(unsigned int id) { const struct domain *dp; - void *tbl; + struct rtable *tbl; struct rtmap *map; struct dommp *dmm; sa_family_t af; @@ -244,11 +253,11 @@ out: return (error); } -void * +struct rtable * rtable_get(unsigned int rtableid, sa_family_t af) { struct rtmap *map; - void *tbl = NULL; + struct rtable *tbl = NULL; struct srp_ref sr; if (af >= nitems(af2idx) || af2idx[af] == 0) @@ -286,7 +295,7 @@ rtable_empty(unsigned int rtableid) { const struct domain *dp; int i; - struct art_root *tbl; + struct rtable *tbl; for (i = 0; (dp = domains[i]) != NULL; i++) { if (dp->dom_rtoffset == 0) @@ -295,7 +304,7 @@ rtable_empty(unsigned int rtableid) tbl = rtable_get(rtableid, dp->dom_family); if (tbl == NULL) continue; - if (tbl->ar_root.ref != NULL) + if (!art_is_empty(tbl->r_art)) return (0); } @@ -350,40 +359,51 @@ rtable_l2set(unsigned int rtableid, unsi } -static inline const uint8_t *satoaddr(struct art_root *, +static inline const uint8_t *satoaddr(struct rtable *, const struct sockaddr *); -int an_match(struct art_node *, const struct sockaddr *, int); -void rtentry_ref(void *, void *); -void rtentry_unref(void *, void *); - void rtable_mpath_insert(struct art_node *, struct rtentry *); -struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL); - void rtable_init_backend(void) { - art_init(); + art_boot(); } -void * +struct rtable * rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) { - return (art_alloc(rtableid, alen, off)); + struct rtable *r; + + r = malloc(sizeof(*r), M_RTABLE, M_NOWAIT|M_ZERO); + if (r == NULL) + return (r); + + + r->r_art = art_alloc(alen); + if (r->r_art == NULL) { + free(r, M_RTABLE, sizeof(*r)); + return (NULL); + } + + rw_init(&r->r_lock, "rtable"); + r->r_off = off; + r->r_source = NULL; + + return (r); } int rtable_setsource(unsigned int rtableid, int af, struct sockaddr *src) { - struct art_root *ar; + struct rtable *r; NET_ASSERT_LOCKED_EXCLUSIVE(); - if ((ar = rtable_get(rtableid, af)) == NULL) + if ((r = rtable_get(rtableid, af)) == NULL) return (EAFNOSUPPORT); - ar->ar_source = src; + r->r_source = src; return (0); } @@ -391,15 +411,15 @@ rtable_setsource(unsigned int rtableid, struct sockaddr * rtable_getsource(unsigned int rtableid, int af) { - struct art_root *ar; + struct rtable *r; NET_ASSERT_LOCKED(); - ar = rtable_get(rtableid, af); - if (ar == NULL) + r = rtable_get(rtableid, af); + if (r == NULL) return (NULL); - return (ar->ar_source); + return (r->r_source); } void @@ -419,37 +439,34 @@ struct rtentry * rtable_lookup(unsigned int rtableid, const struct sockaddr *dst, const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio) { - struct art_root *ar; + struct rtable *r; struct art_node *an; struct rtentry *rt = NULL; - struct srp_ref sr, nsr; const uint8_t *addr; int plen; - ar = rtable_get(rtableid, dst->sa_family); - if (ar == NULL) + r = rtable_get(rtableid, dst->sa_family); + if (r == NULL) return (NULL); - addr = satoaddr(ar, dst); + addr = satoaddr(r, dst); - /* No need for a perfect match. */ + smr_read_enter(); if (mask == NULL) { - an = art_match(ar, addr, &nsr); - if (an == NULL) - goto out; + /* No need for a perfect match. */ + an = art_match(r->r_art, addr); } else { plen = rtable_satoplen(dst->sa_family, mask); if (plen == -1) return (NULL); - an = art_lookup(ar, addr, plen, &nsr); - - /* Make sure we've got a perfect match. */ - if (!an_match(an, dst, plen)) - goto out; + an = art_lookup(r->r_art, addr, plen); } + if (an == NULL) + goto out; - SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { + for (rt = SMR_PTR_GET(&an->an_value); rt != NULL; + rt = SMR_PTR_GET(&rt->rt_next)) { if (prio != RTP_ANY && (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) continue; @@ -464,9 +481,8 @@ rtable_lookup(unsigned int rtableid, con if (rt != NULL) rtref(rt); - SRPL_LEAVE(&sr); out: - srp_leave(&nsr); + smr_read_leave(); return (rt); } @@ -474,44 +490,41 @@ out: struct rtentry * rtable_match(unsigned int rtableid, const struct sockaddr *dst, uint32_t *src) { - struct art_root *ar; + struct rtable *r; struct art_node *an; struct rtentry *rt = NULL; - struct srp_ref sr, nsr; const uint8_t *addr; int hash; + uint8_t prio; - ar = rtable_get(rtableid, dst->sa_family); - if (ar == NULL) + r = rtable_get(rtableid, dst->sa_family); + if (r == NULL) return (NULL); - addr = satoaddr(ar, dst); + addr = satoaddr(r, dst); - an = art_match(ar, addr, &nsr); + smr_read_enter(); + an = art_match(r->r_art, addr); if (an == NULL) goto out; - rt = SRPL_FIRST(&sr, &an->an_rtlist); - if (rt == NULL) { - SRPL_LEAVE(&sr); - goto out; - } - rtref(rt); - SRPL_LEAVE(&sr); + rt = SMR_PTR_GET(&an->an_value); + KASSERT(rt != NULL); + prio = rt->rt_priority; /* Gateway selection by Hash-Threshold (RFC 2992) */ if ((hash = rt_hash(rt, dst, src)) != -1) { struct rtentry *mrt; - int threshold, npaths = 0; + int threshold, npaths = 1; KASSERT(hash <= 0xffff); - SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) { - /* Only count nexthops with the same priority. */ - if (mrt->rt_priority == rt->rt_priority) + /* Only count nexthops with the same priority. */ + mrt = rt; + while ((mrt = SMR_PTR_GET(&mrt->rt_next)) != NULL) { + if (mrt->rt_priority == prio) npaths++; } - SRPL_LEAVE(&sr); threshold = (0xffff / npaths) + 1; @@ -520,27 +533,22 @@ rtable_match(unsigned int rtableid, cons * route list attached to the node, so we won't necessarily * have the same number of routes. for most modifications, * we'll pick a route that we wouldn't have if we only saw the - * list before or after the change. if we were going to use - * the last available route, but it got removed, we'll hit - * the end of the list and then pick the first route. + * list before or after the change. */ - - mrt = SRPL_FIRST(&sr, &an->an_rtlist); - while (hash > threshold && mrt != NULL) { - if (mrt->rt_priority == rt->rt_priority) + mrt = rt; + while (hash > threshold) { + if (mrt->rt_priority == prio) { + rt = mrt; hash -= threshold; - mrt = SRPL_FOLLOW(&sr, mrt, rt_next); - } - - if (mrt != NULL) { - rtref(mrt); - rtfree(rt); - rt = mrt; + } + mrt = SMR_PTR_GET(&mrt->rt_next); + if (mrt == NULL) + break; } - SRPL_LEAVE(&sr); } + rtref(rt); out: - srp_leave(&nsr); + smr_read_leave(); return (rt); } @@ -549,35 +557,59 @@ rtable_insert(unsigned int rtableid, str const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio, struct rtentry *rt) { - struct rtentry *mrt; - struct srp_ref sr; - struct art_root *ar; + struct rtable *r; struct art_node *an, *prev; const uint8_t *addr; int plen; unsigned int rt_flags; int error = 0; - ar = rtable_get(rtableid, dst->sa_family); - if (ar == NULL) + r = rtable_get(rtableid, dst->sa_family); + if (r == NULL) return (EAFNOSUPPORT); - addr = satoaddr(ar, dst); + addr = satoaddr(r, dst); plen = rtable_satoplen(dst->sa_family, mask); if (plen == -1) return (EINVAL); - rtref(rt); /* guarantee rtfree won't do anything during insert */ - rw_enter_write(&ar->ar_lock); + an = art_get(addr, plen); + if (an == NULL) + return (ENOMEM); + + /* prepare for immediate operation if insert succeeds */ + rt_flags = rt->rt_flags; + rt->rt_flags &= ~RTF_MPATH; + rt->rt_dest = dst; + rt->rt_plen = plen; + rt->rt_next = NULL; + + rtref(rt); /* take a ref for the table */ + an->an_value = rt; - /* Do not permit exactly the same dst/mask/gw pair. */ - an = art_lookup(ar, addr, plen, &sr); - srp_leave(&sr); /* an can't go away while we have the lock */ - if (an_match(an, dst, plen)) { - struct rtentry *mrt; - int mpathok = ISSET(rt->rt_flags, RTF_MPATH); + rw_enter_write(&r->r_lock); + prev = art_insert(r->r_art, an); + if (prev == NULL) { + error = ENOMEM; + goto put; + } + + if (prev != an) { + struct rtentry *mrt; + int mpathok = ISSET(rt_flags, RTF_MPATH); + int mpath = 0; - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { + /* + * An ART node with the same destination/netmask already + * exists. + */ + art_put(an); + an = prev; + + /* Do not permit exactly the same dst/mask/gw pair. */ + for (mrt = SMR_PTR_GET_LOCKED(&an->an_value); + mrt != NULL; + mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) { if (prio != RTP_ANY && (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) continue; @@ -589,63 +621,34 @@ rtable_insert(unsigned int rtableid, str error = EEXIST; goto leave; } - } - } - - an = art_get(plen); - if (an == NULL) { - error = ENOBUFS; - goto leave; - } - - /* prepare for immediate operation if insert succeeds */ - rt_flags = rt->rt_flags; - rt->rt_flags &= ~RTF_MPATH; - rt->rt_dest = dst; - rt->rt_plen = plen; - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); - - prev = art_insert(ar, an, addr, plen); - if (prev != an) { - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, - rt_next); - rt->rt_flags = rt_flags; - art_put(an); - - if (prev == NULL) { - error = ESRCH; - goto leave; + mpath = RTF_MPATH; } - an = prev; - - mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); - KASSERT(mrt != NULL); - KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio); + /* The new route can be added to the list. */ + if (mpath) { + SET(rt->rt_flags, RTF_MPATH); + + for (mrt = SMR_PTR_GET_LOCKED(&an->an_value); + mrt != NULL; + mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) { + if ((mrt->rt_priority & RTP_MASK) != + (prio & RTP_MASK)) + continue; - /* - * An ART node with the same destination/netmask already - * exists, MPATH conflict must have been already checked. - */ - if (rt->rt_flags & RTF_MPATH) { - /* - * Only keep the RTF_MPATH flag if two routes have - * the same gateway. - */ - rt->rt_flags &= ~RTF_MPATH; - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { - if (mrt->rt_priority == prio) { - mrt->rt_flags |= RTF_MPATH; - rt->rt_flags |= RTF_MPATH; - } + SET(mrt->rt_flags, RTF_MPATH); } } /* Put newly inserted entry at the right place. */ rtable_mpath_insert(an, rt); } + rw_exit_write(&r->r_lock); + return (error); + +put: + art_put(an); leave: - rw_exit_write(&ar->ar_lock); + rw_exit_write(&r->r_lock); rtfree(rt); return (error); } @@ -654,118 +657,123 @@ int rtable_delete(unsigned int rtableid, const struct sockaddr *dst, const struct sockaddr *mask, struct rtentry *rt) { - struct art_root *ar; + struct rtable *r; struct art_node *an; - struct srp_ref sr; const uint8_t *addr; int plen; struct rtentry *mrt; - int npaths = 0; - int error = 0; - ar = rtable_get(rtableid, dst->sa_family); - if (ar == NULL) + r = rtable_get(rtableid, dst->sa_family); + if (r == NULL) return (EAFNOSUPPORT); - addr = satoaddr(ar, dst); + addr = satoaddr(r, dst); plen = rtable_satoplen(dst->sa_family, mask); if (plen == -1) return (EINVAL); - rtref(rt); /* guarantee rtfree won't do anything under ar_lock */ - rw_enter_write(&ar->ar_lock); - an = art_lookup(ar, addr, plen, &sr); - srp_leave(&sr); /* an can't go away while we have the lock */ - - /* Make sure we've got a perfect match. */ - if (!an_match(an, dst, plen)) { - error = ESRCH; - goto leave; + rw_enter_write(&r->r_lock); + an = art_lookup(r->r_art, addr, plen); + if (an == NULL) { + rw_exit_write(&r->r_lock); + return (ESRCH); } - /* - * If other multipath route entries are still attached to - * this ART node we only have to unlink it. - */ - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) - npaths++; - - if (npaths > 1) { - KASSERT(refcnt_read(&rt->rt_refcnt) >= 1); - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, - rt_next); - - mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); - if (npaths == 2) - mrt->rt_flags &= ~RTF_MPATH; + /* If this is the only route in the list then we can delete the node */ + if (SMR_PTR_GET_LOCKED(&an->an_value) == rt && + SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) { + struct art_node *oan; + oan = art_delete(r->r_art, addr, plen); + if (oan != an) + panic("art %p changed shape during delete", r->r_art); + art_put(an); + /* + * XXX an and the rt ref could still be alive on other cpus. + * this currently works because of the NET_LOCK/KERNEL_LOCK + * but should be fixed if we want to do route lookups outside + * these locks. + */ + } else { + struct rtentry **prt = (struct rtentry **)&an->an_value; + struct rtentry *nrt; + unsigned int found = 0; + unsigned int npaths = 0; - goto leave; + /* + * If other multipath route entries are still attached to + * this ART node we only have to unlink it. + */ + while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) { + if (mrt == rt) { + found = 1; + SMR_PTR_SET_LOCKED(prt, + SMR_PTR_GET_LOCKED(&mrt->rt_next)); + } else if ((mrt->rt_priority & RTP_MASK) == + (rt->rt_priority & RTP_MASK)) { + npaths++; + nrt = mrt; + } + } + if (!found) + panic("removing non-existent route"); + if (npaths == 1) + CLR(nrt->rt_flags, RTF_MPATH); } - - if (art_delete(ar, an, addr, plen) == NULL) - panic("art_delete failed to find node %p", an); - KASSERT(refcnt_read(&rt->rt_refcnt) >= 1); - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); - art_put(an); - -leave: - rw_exit_write(&ar->ar_lock); + rw_exit_write(&r->r_lock); rtfree(rt); - return (error); -} - -struct rtable_walk_cookie { - int (*rwc_func)(struct rtentry *, void *, unsigned int); - void *rwc_arg; - struct rtentry **rwc_prt; - unsigned int rwc_rid; -}; - -/* - * Helper for rtable_walk to keep the ART code free from any "struct rtentry". - */ -int -rtable_walk_helper(struct art_node *an, void *xrwc) -{ - struct srp_ref sr; - struct rtable_walk_cookie *rwc = xrwc; - struct rtentry *rt; - int error = 0; - - SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { - error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid); - if (error != 0) - break; - } - if (rwc->rwc_prt != NULL && rt != NULL) { - rtref(rt); - *rwc->rwc_prt = rt; - } - SRPL_LEAVE(&sr); - - return (error); + return (0); } int rtable_walk(unsigned int rtableid, sa_family_t af, struct rtentry **prt, int (*func)(struct rtentry *, void *, unsigned int), void *arg) { - struct art_root *ar; - struct rtable_walk_cookie rwc; + struct rtable *r; + struct art_iter ai; + struct art_node *an; int error; - ar = rtable_get(rtableid, af); - if (ar == NULL) + r = rtable_get(rtableid, af); + if (r == NULL) return (EAFNOSUPPORT); - rwc.rwc_func = func; - rwc.rwc_arg = arg; - rwc.rwc_prt = prt; - rwc.rwc_rid = rtableid; + rw_enter_write(&r->r_lock); + ART_FOREACH(an, r->r_art, &ai) { + struct rtentry *rt, *nrt; + + /* + * ART nodes have a list of rtentries. + */ + rt = SMR_PTR_GET_LOCKED(&an->an_value); + rtref(rt); + do { + /* Get ready for the next entry. */ + nrt = SMR_PTR_GET_LOCKED(&rt->rt_next); + if (nrt != NULL) + rtref(nrt); + + /* + * art_iter holds references so the topology + * so it won't change. + */ + rw_exit_write(&r->r_lock); + error = func(rt, arg, rtableid); + rtfree(rt); + rw_enter_write(&r->r_lock); + + if (error != 0) { + if (nrt != NULL) + rtfree(nrt); + art_iter_close(&ai); + break; + } - error = art_walk(ar, rtable_walk_helper, &rwc); + rt = nrt; + } while (rt != NULL); + } + rw_exit_write(&r->r_lock); return (error); } @@ -774,12 +782,12 @@ struct rtentry * rtable_iterate(struct rtentry *rt0) { struct rtentry *rt = NULL; - struct srp_ref sr; - rt = SRPL_NEXT(&sr, rt0, rt_next); + smr_read_enter(); + rt = SMR_PTR_GET(&rt0->rt_next); if (rt != NULL) rtref(rt); - SRPL_LEAVE(&sr); + smr_read_leave(); rtfree(rt0); return (rt); } @@ -794,27 +802,26 @@ int rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, int plen, uint8_t prio, struct rtentry *rt) { - struct art_root *ar; + struct rtable *r; struct art_node *an; - struct srp_ref sr; const uint8_t *addr; int error = 0; - ar = rtable_get(rtableid, dst->sa_family); - if (ar == NULL) + r = rtable_get(rtableid, dst->sa_family); + if (r == NULL) return (EAFNOSUPPORT); - addr = satoaddr(ar, dst); - - rw_enter_write(&ar->ar_lock); - an = art_lookup(ar, addr, plen, &sr); - srp_leave(&sr); /* an can't go away while we have the lock */ + addr = satoaddr(r, dst); - /* Make sure we've got a perfect match. */ - if (!an_match(an, dst, plen)) { + rw_enter_write(&r->r_lock); + an = art_lookup(r->r_art, addr, plen); + if (an == NULL) { error = ESRCH; - } else if (SRPL_FIRST_LOCKED(&an->an_rtlist) == rt && - SRPL_NEXT_LOCKED(rt, rt_next) == NULL) { + goto leave; + } + + if (SMR_PTR_GET_LOCKED(&an->an_value) == rt && + SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) { /* * If there's only one entry on the list do not go * through an insert/remove cycle. This is done to @@ -823,15 +830,22 @@ rtable_mpath_reprio(unsigned int rtablei */ rt->rt_priority = prio; } else { - rtref(rt); /* keep rt alive in between remove and insert */ - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, - rt, rtentry, rt_next); + struct rtentry **prt = (struct rtentry **)&an->an_value; + struct rtentry *mrt; + + while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) { + if (mrt == rt) + break; + } + KASSERT(mrt != NULL); + + SMR_PTR_SET_LOCKED(prt, SMR_PTR_GET_LOCKED(&rt->rt_next)); rt->rt_priority = prio; rtable_mpath_insert(an, rt); - rtfree(rt); error = EAGAIN; } - rw_exit_write(&ar->ar_lock); +leave: + rw_exit_write(&r->r_lock); return (error); } @@ -839,63 +853,19 @@ rtable_mpath_reprio(unsigned int rtablei void rtable_mpath_insert(struct art_node *an, struct rtentry *rt) { - struct rtentry *mrt, *prt = NULL; + struct rtentry *mrt, **prt; uint8_t prio = rt->rt_priority; - if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) == NULL) { - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); - return; - } + prt = (struct rtentry **)&an->an_value; /* Iterate until we find the route to be placed after ``rt''. */ - while (mrt->rt_priority <= prio && SRPL_NEXT_LOCKED(mrt, rt_next)) { - prt = mrt; - mrt = SRPL_NEXT_LOCKED(mrt, rt_next); - } - - if (mrt->rt_priority <= prio) { - SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next); - } else if (prt != NULL) { - SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, rt_next); - } else { - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); + while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) { + if (mrt->rt_priority >= prio) + break; } -} - -/* - * Returns 1 if ``an'' perfectly matches (``dst'', ``plen''), 0 otherwise. - */ -int -an_match(struct art_node *an, const struct sockaddr *dst, int plen) -{ - struct rtentry *rt; - struct srp_ref sr; - int match; - - if (an == NULL || an->an_plen != plen) - return (0); - - rt = SRPL_FIRST(&sr, &an->an_rtlist); - match = (rt != NULL && memcmp(rt->rt_dest, dst, dst->sa_len) == 0); - SRPL_LEAVE(&sr); - - return (match); -} -void -rtentry_ref(void *null, void *xrt) -{ - struct rtentry *rt = xrt; - - rtref(rt); -} - -void -rtentry_unref(void *null, void *xrt) -{ - struct rtentry *rt = xrt; - - rtfree(rt); + SMR_PTR_SET_LOCKED(&rt->rt_next, mrt); + SMR_PTR_SET_LOCKED(prt, rt); } /* @@ -904,9 +874,9 @@ rtentry_unref(void *null, void *xrt) * of "struct sockaddr" used by this routing table. */ static inline const uint8_t * -satoaddr(struct art_root *at, const struct sockaddr *sa) +satoaddr(struct rtable *r, const struct sockaddr *sa) { - return (((const uint8_t *)sa) + at->ar_off); + return (((const uint8_t *)sa) + r->r_off); } /* Index: rtable.h =================================================================== RCS file: /cvs/src/sys/net/rtable.h,v diff -u -p -r1.30 rtable.h --- rtable.h 13 May 2024 01:15:53 -0000 1.30 +++ rtable.h 12 Jun 2025 12:47:33 -0000 @@ -28,6 +28,8 @@ #define rt_plen(rt) ((rt)->rt_plen) #define RT_ROOT(rt) (0) +struct rtable; + int rtable_satoplen(sa_family_t, const struct sockaddr *); void rtable_init(void);