Index: sys/net/art.c =================================================================== RCS file: /cvs/src/sys/net/art.c,v diff -u -p -r1.33 art.c --- sys/net/art.c 7 Jul 2025 07:09:05 -0000 1.33 +++ sys/net/art.c 7 Jul 2025 09:26:05 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: art.c,v 1.33 2025/07/07 07:09:05 dlg Exp $ */ +/* $OpenBSD: art.c,v 1.31 2023/11/11 12:17:50 bluhm Exp $ */ /* * Copyright (c) 2015 Martin Pieuchot @@ -22,6 +22,58 @@ * * Yoichi Hariguchi paper can be found at: * http://www.hariguchi.org/art/art.pdf + * + * This implementation is tweaked to minimise pointer traversal + * during lookups by only accessing the heaps. It avoids following + * pointers to or through the art_table structures. + * + * The heap is an array of art_heap_entry values which have different + * meanings depending on their location. The majority of entries in + * the heap are used as pointers to nodes (struct an_node, aka leaf + * entries), or the next heap to traverse. These pointers are typed/ + * tagged by the low bit in their heap value, and must be masked + * appropriately before use. + * + * The first two entries in the heap are not used by the search + * algorithm, so they are used to store data associated with this + * part of the heap. The first entry (heap[0]) stores a pointer to + * an art_table struct associated with this heap. The second entry + * (heap[1]) stores the node pointer which would be in the parent + * heap if this table wasn't using it. This node pointer is also known + * as the default entry/route for the current table in the ART. + * + * Nodes contain the exact information about the prefix they represent + * in the ART, ie, the address and prefix length, and a pointer to + * data associated with that prefix (an_value). + * + * The relationships between the separate data structures looks + * like the following: + * + * +------------------+ + * | struct art | + * +------------------+ NULL + * | ^ + * | art_root | at_parent + * v heap[0] | + * +------------------+ ----------> +------------------+ + * | heap | | struct art_table | + * +------------------+ <---------- +------------------+ + * | at_heap ^ + * | heap entry | at_parent + * v heap[0] | + * +------------------+ ----------> +------------------+ + * | heap | | struct art_table | + * +------------------+ <---------- +------------------+ + * | at_heap + * | node entry + * v + * +------------------+ + * | struct art_node | + * +------------------+ + * | + * | an_value + * v + * "user" data */ #ifndef _KERNEL @@ -32,43 +84,39 @@ #include #include #include +#include +#include #endif #include -int art_bindex(struct art_table *, const uint8_t *, int); -void art_allot(struct art_table *at, int, struct art_node *, - struct art_node *); -struct art_table *art_table_get(struct art_root *, struct art_table *, - int); -struct art_table *art_table_put(struct art_root *, struct art_table *); -struct art_node *art_table_insert(struct art_root *, struct art_table *, +static void art_allot(struct art_table *at, unsigned int, + art_heap_entry, art_heap_entry); +struct art_table *art_table_get(struct art *, struct art_table *, + unsigned int); +struct art_table *art_table_put(struct art *, struct art_table *); +struct art_node *art_table_insert(struct art *, struct art_table *, int, struct art_node *); -struct art_node *art_table_delete(struct art_root *, struct art_table *, +struct art_node *art_table_delete(struct art *, struct art_table *, int, struct art_node *); -struct art_table *art_table_ref(struct art_root *, struct art_table *); -int art_table_free(struct art_root *, struct art_table *); -int art_table_walk(struct art_root *, struct art_table *, - int (*f)(struct art_node *, void *), void *); -int art_walk_apply(struct art_root *, - struct art_node *, struct art_node *, - int (*f)(struct art_node *, void *), void *); +struct art_table *art_table_ref(struct art *, struct art_table *); +int art_table_free(struct art *, struct art_table *); void art_table_gc(void *); void art_gc(void *); -struct pool an_pool, at_pool, at_heap_4_pool, at_heap_8_pool; +static struct pool an_pool, at_pool, at_heap_4_pool, at_heap_8_pool; -struct art_table *art_table_gc_list = NULL; -struct mutex art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); -struct task art_table_gc_task = +static struct art_table *art_table_gc_list = NULL; +static struct mutex art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); +static struct task art_table_gc_task = TASK_INITIALIZER(art_table_gc, NULL); -struct art_node *art_node_gc_list = NULL; -struct mutex art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); -struct task art_node_gc_task = TASK_INITIALIZER(art_gc, NULL); +static struct art_node *art_node_gc_list = NULL; +static struct mutex art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); +static struct task art_node_gc_task = TASK_INITIALIZER(art_gc, NULL); void -art_init(void) +art_boot(void) { pool_init(&an_pool, sizeof(struct art_node), 0, IPL_SOFTNET, 0, "art_node", NULL); @@ -83,55 +131,74 @@ art_init(void) /* * Per routing table initialization API function. */ -struct art_root * -art_alloc(unsigned int rtableid, unsigned int alen) -{ - struct art_root *ar; - int i; - ar = malloc(sizeof(*ar), M_RTABLE, M_NOWAIT|M_ZERO); - if (ar == NULL) - return (NULL); +static const unsigned int art_plen32_levels[] = { + 8, 4, 4, 4, 4, 4, 4 +}; + +static const unsigned int art_plen128_levels[] = { + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 +}; + +static const unsigned int art_plen20_levels[] = { + 4, 4, 4, 4, 4 +}; + +void +art_init(struct art *art, unsigned int alen) +{ + const unsigned int *levels; + unsigned int nlevels; +#ifdef DIAGNOSTIC + unsigned int i; + unsigned int bits = 0; +#endif switch (alen) { case 32: - ar->ar_alen = 32; - ar->ar_nlvl = 7; - ar->ar_bits[0] = 8; - for (i = 1; i < ar->ar_nlvl; i++) - ar->ar_bits[i] = 4; + levels = art_plen32_levels; + nlevels = nitems(art_plen32_levels); break; case 128: - ar->ar_alen = 128; - ar->ar_nlvl = 32; - for (i = 0; i < ar->ar_nlvl; i++) - ar->ar_bits[i] = 4; + levels = art_plen128_levels; + nlevels = nitems(art_plen128_levels); + break; + case 20: + levels = art_plen20_levels; + nlevels = nitems(art_plen20_levels); break; default: - printf("%s: incorrect address length %u\n", __func__, alen); - free(ar, M_RTABLE, sizeof(*ar)); - return (NULL); + panic("no configuration for alen %u", alen); + /* NOTREACHED */ } - rw_init(&ar->ar_lock, "art"); +#ifdef DIAGNOSTIC + for (i = 0; i < nlevels; i++) + bits += levels[i]; + + if (alen != bits) + panic("sum of levels %u != address len %u", bits, alen); +#endif /* DIAGNOSTIC */ - return (ar); + art->art_root = 0; + art->art_levels = levels; + art->art_nlevels = nlevels; + art->art_alen = alen; } -/* - * Return 1 if ``old'' and ``new`` are identical, 0 otherwise. - */ -static inline int -art_check_duplicate(struct art_root *ar, struct art_node *old, - struct art_node *new) +struct art * +art_alloc(unsigned int alen) { - if (old == NULL) - return (0); + struct art *art; - if (old->an_plen == new->an_plen) - return (1); + art = malloc(sizeof(*art), M_RTABLE, M_NOWAIT|M_ZERO); + if (art == NULL) + return (NULL); - return (0); + art_init(art, alen); + + return (art); } /* @@ -146,30 +213,31 @@ art_check_duplicate(struct art_root *ar, * 8bit-long tables, there's a maximum of 4 base indexes if the * prefix length is > 24. */ -int -art_bindex(struct art_table *at, const uint8_t *addr, int plen) +static unsigned int __pure +art_bindex(unsigned int offset, unsigned int bits, + const uint8_t *addr, unsigned int plen) { - uint8_t boff, bend; + unsigned int boff, bend; uint32_t k; - if (plen < at->at_offset || plen > (at->at_offset + at->at_bits)) - return (-1); + KASSERT(plen >= offset); + KASSERT(plen <= (offset + bits)); /* * We are only interested in the part of the prefix length * corresponding to the range of this table. */ - plen -= at->at_offset; + plen -= offset; /* * Jump to the first byte of the address containing bits * covered by this table. */ - addr += (at->at_offset / 8); + addr += (offset / 8); /* ``at'' covers the bit range between ``boff'' & ``bend''. */ - boff = (at->at_offset % 8); - bend = (at->at_bits + boff); + boff = (offset % 8); + bend = (bits + boff); KASSERT(bend <= 32); @@ -186,25 +254,13 @@ art_bindex(struct art_table *at, const u k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8); k |= addr[1] >> (16 - bend); } else { - k = (addr[0] >> (8 - bend)) & ((1 << at->at_bits) - 1); + k = (addr[0] >> (8 - bend)) & ((1 << bits) - 1); } /* * Single level base index formula: */ - return ((k >> (at->at_bits - plen)) + (1 << plen)); -} - -/* - * Single level lookup function. - * - * Return the fringe index of the part of ``addr'' - * corresponding to the range covered by the table ``at''. - */ -static inline int -art_findex(struct art_table *at, const uint8_t *addr) -{ - return art_bindex(at, addr, (at->at_offset + at->at_bits)); + return ((k >> (bits - plen)) + (1 << plen)); } /* @@ -213,60 +269,94 @@ art_findex(struct art_table *at, const u * Return the best existing match for a destination. */ struct art_node * -art_match(struct art_root *ar, const void *addr, struct srp_ref *nsr) +art_match(struct art *art, const void *addr) { - struct srp_ref dsr, ndsr; - void *entry; - struct art_table *at; - struct art_node *dflt, *ndflt; - int j; - - entry = srp_enter(nsr, &ar->ar_root); - at = entry; + unsigned int offset = 0; + unsigned int level = 0; + art_heap_entry *heap; + art_heap_entry ahe, dahe = 0; + struct art_node *an; + int j; - if (at == NULL) - goto done; + SMR_ASSERT_CRITICAL(); - /* - * Remember the default route of each table we visit in case - * we do not find a better matching route. - */ - dflt = srp_enter(&dsr, &at->at_default); + heap = SMR_PTR_GET(&art->art_root); + if (heap == NULL) + return (NULL); /* * Iterate until we find a leaf. */ - while (1) { + for (;;) { + unsigned int bits = art->art_levels[level]; + unsigned int p = offset + bits; + + /* + * Remember the default route of each table we visit in case + * we do not find a better matching route. + */ + ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]); + if (ahe != 0) + dahe = ahe; + /* Do a single level route lookup. */ - j = art_findex(at, addr); - entry = srp_follow(nsr, &at->at_heap[j].node); + j = art_bindex(offset, bits, addr, p); + ahe = SMR_PTR_GET(&heap[j]); /* If this is a leaf (NULL is a leaf) we're done. */ - if (ISLEAF(entry)) + if (art_heap_entry_is_node(ahe)) break; - at = SUBTABLE(entry); + heap = art_heap_entry_to_heap(ahe); + offset = p; + level++; + + KASSERT(level < art->art_nlevels); + } + + an = art_heap_entry_to_node(ahe); + if (an != NULL) + return (an); + + return art_heap_entry_to_node(dahe); +} + +static int +art_node_check(const struct art_node *an, + const uint8_t *addr, unsigned int plen) +{ + const uint32_t *wan = (const uint32_t *)an->an_addr; + const uint32_t *waddr = (const uint32_t *)addr; + unsigned int i = 0; + unsigned int shift; + + if (an->an_plen != plen) + return (0); + + while (plen >= 32) { + if (wan[i] != waddr[i]) + return (0); + + i++; + plen -= 32; + } + if (plen == 0) + return (1); - ndflt = srp_enter(&ndsr, &at->at_default); - if (ndflt != NULL) { - srp_leave(&dsr); - dsr = ndsr; - dflt = ndflt; - } else - srp_leave(&ndsr); - } - - if (entry == NULL) { - srp_leave(nsr); - *nsr = dsr; - KASSERT(ISLEAF(dflt)); - return (dflt); - } - - srp_leave(&dsr); -done: - KASSERT(ISLEAF(entry)); - return (entry); + i *= 4; + while (plen >= 8) { + if (an->an_addr[i] != addr[i]) + return (0); + + i++; + plen -= 8; + } + if (plen == 0) + return (1); + + shift = 8 - plen; + + return ((an->an_addr[i] >> shift) == (addr[i] >> shift)); } /* @@ -276,24 +366,28 @@ done: * it does not exist. */ struct art_node * -art_lookup(struct art_root *ar, const void *addr, int plen, struct srp_ref *nsr) +art_lookup(struct art *art, const void *addr, unsigned int plen) { - void *entry; - struct art_table *at; - int i, j; + unsigned int offset = 0; + unsigned int bits, p; + unsigned int level = 0; + art_heap_entry *heap; + art_heap_entry ahe; + struct art_node *an; + unsigned int i, j; - KASSERT(plen >= 0 && plen <= ar->ar_alen); + KASSERT(plen <= art->art_alen); - entry = srp_enter(nsr, &ar->ar_root); - at = entry; + SMR_ASSERT_CRITICAL(); - if (at == NULL) - goto done; + heap = SMR_PTR_GET(&art->art_root); + if (heap == NULL) + return (NULL); /* Default route */ if (plen == 0) { - entry = srp_follow(nsr, &at->at_default); - goto done; + ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]); + return art_heap_entry_to_node(ahe); } /* @@ -301,31 +395,47 @@ art_lookup(struct art_root *ar, const vo * the stride length at this level the entry must * be in the current table. */ - while (plen > (at->at_offset + at->at_bits)) { + for (;;) { + bits = art->art_levels[level]; + p = offset + bits; + if (plen <= p) + break; + /* Do a single level route lookup. */ - j = art_findex(at, addr); - entry = srp_follow(nsr, &at->at_heap[j].node); + j = art_bindex(offset, bits, addr, p); + ahe = SMR_PTR_GET(&heap[j]); /* A leaf is a match, but not a perfect one, or NULL */ - if (ISLEAF(entry)) + if (art_heap_entry_is_node(ahe)) return (NULL); - at = SUBTABLE(entry); + heap = art_heap_entry_to_heap(ahe); + offset = p; + level++; + + KASSERT(level < art->art_nlevels); } - i = art_bindex(at, addr, plen); - if (i == -1) - return (NULL); + i = art_bindex(offset, bits, addr, plen); + ahe = SMR_PTR_GET(&heap[i]); + if (!art_heap_entry_is_node(ahe)) { + heap = art_heap_entry_to_heap(ahe); + ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]); + } - entry = srp_follow(nsr, &at->at_heap[i].node); - if (!ISLEAF(entry)) - entry = srp_follow(nsr, &SUBTABLE(entry)->at_default); - -done: - KASSERT(ISLEAF(entry)); - return (entry); + /* Make sure we've got a perfect match */ + an = art_heap_entry_to_node(ahe); + if (an != NULL && art_node_check(an, addr, plen)) + return (an); + + return (NULL); } +int +art_is_empty(struct art *art) +{ + return (SMR_PTR_GET_LOCKED(&art->art_root) == NULL); +} /* * Insertion API function. @@ -334,32 +444,38 @@ done: * same destination/mask pair is already present. */ struct art_node * -art_insert(struct art_root *ar, struct art_node *an, const void *addr, int plen) +art_insert(struct art *art, struct art_node *an) { - struct art_table *at, *child; - struct art_node *node; - int i, j; - - rw_assert_wrlock(&ar->ar_lock); - KASSERT(plen >= 0 && plen <= ar->ar_alen); - - at = srp_get_locked(&ar->ar_root); - if (at == NULL) { - at = art_table_get(ar, NULL, -1); + unsigned int p; + art_heap_entry ahe, oahe, *ahep; + art_heap_entry *heap; + struct art_node *oan; + struct art_table *at; + unsigned int i, j; + + KASSERT(an->an_plen <= art->art_alen); + + heap = SMR_PTR_GET_LOCKED(&art->art_root); + if (heap == NULL) { + at = art_table_get(art, NULL, -1); if (at == NULL) return (NULL); - srp_swap_locked(&ar->ar_root, at); - } + heap = at->at_heap; + SMR_PTR_SET_LOCKED(&art->art_root, heap); + } else + at = art_heap_to_table(heap); /* Default route */ - if (plen == 0) { - node = srp_get_locked(&at->at_default); - if (node != NULL) - return (node); + if (an->an_plen == 0) { + ahep = &heap[ART_HEAP_IDX_DEFAULT]; + oahe = SMR_PTR_GET_LOCKED(ahep); + oan = art_heap_entry_to_node(oahe); + if (oan != NULL) + return (oan); - art_table_ref(ar, at); - srp_swap_locked(&at->at_default, an); + art_table_ref(art, at); + SMR_PTR_SET_LOCKED(ahep, art_node_to_heap_entry(an)); return (an); } @@ -368,10 +484,11 @@ art_insert(struct art_root *ar, struct a * the stride length at this level the entry must * be in the current table. */ - while (plen > (at->at_offset + at->at_bits)) { + while ((p = at->at_offset + at->at_bits) < an->an_plen) { /* Do a single level route lookup. */ - j = art_findex(at, addr); - node = srp_get_locked(&at->at_heap[j].node); + j = art_bindex(at->at_offset, at->at_bits, an->an_addr, p); + ahep = &heap[j]; + ahe = SMR_PTR_GET_LOCKED(ahep); /* * If the node corresponding to the fringe index is @@ -379,84 +496,86 @@ art_insert(struct art_root *ar, struct a * entry of this node will then become the default * route of the subtable. */ - if (ISLEAF(node)) { - child = art_table_get(ar, at, j); + if (art_heap_entry_is_node(ahe)) { + struct art_table *child = art_table_get(art, at, j); if (child == NULL) return (NULL); - art_table_ref(ar, at); - srp_swap_locked(&at->at_heap[j].node, ASNODE(child)); + art_table_ref(art, at); + at = child; - } else - at = SUBTABLE(node); + heap = at->at_heap; + SMR_PTR_SET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT], ahe); + SMR_PTR_SET_LOCKED(ahep, art_heap_to_heap_entry(heap)); + } else { + heap = art_heap_entry_to_heap(ahe); + at = art_heap_to_table(heap); + } } - i = art_bindex(at, addr, plen); - if (i == -1) - return (NULL); - - return (art_table_insert(ar, at, i, an)); -} - -/* - * Single level insertion. - */ -struct art_node * -art_table_insert(struct art_root *ar, struct art_table *at, int i, - struct art_node *an) -{ - struct art_node *prev, *node; - - node = srp_get_locked(&at->at_heap[i].node); - if (!ISLEAF(node)) - prev = srp_get_locked(&SUBTABLE(node)->at_default); - else - prev = node; - - if (art_check_duplicate(ar, prev, an)) - return (prev); + i = art_bindex(at->at_offset, at->at_bits, an->an_addr, an->an_plen); + ahep = &heap[i]; + oahe = SMR_PTR_GET_LOCKED(ahep); + if (!art_heap_entry_is_node(oahe)) { + heap = art_heap_entry_to_heap(oahe); + ahep = &heap[ART_HEAP_IDX_DEFAULT]; + oahe = SMR_PTR_GET_LOCKED(ahep); + } - art_table_ref(ar, at); + /* Check if there's an existing node */ + oan = art_heap_entry_to_node(oahe); + if (oan != NULL && art_node_check(oan, an->an_addr, an->an_plen)) + return (oan); /* * If the index `i' of the route that we are inserting is not * a fringe index, we need to allot this new route pointer to * all the corresponding fringe indices. */ + art_table_ref(art, at); + ahe = art_node_to_heap_entry(an); if (i < at->at_minfringe) - art_allot(at, i, prev, an); - else if (!ISLEAF(node)) - srp_swap_locked(&SUBTABLE(node)->at_default, an); + art_allot(at, i, oahe, ahe); else - srp_swap_locked(&at->at_heap[i].node, an); + SMR_PTR_SET_LOCKED(ahep, ahe); return (an); } - /* * Deletion API function. */ struct art_node * -art_delete(struct art_root *ar, struct art_node *an, const void *addr, int plen) +art_delete(struct art *art, const void *addr, unsigned int plen) { + unsigned int p; + art_heap_entry *heap; struct art_table *at; - struct art_node *node; - int i, j; + art_heap_entry ahe, pahe, *ahep; + struct art_node *an; + unsigned int i, j; - rw_assert_wrlock(&ar->ar_lock); - KASSERT(plen >= 0 && plen <= ar->ar_alen); + KASSERT(plen <= art->art_alen); - at = srp_get_locked(&ar->ar_root); - if (at == NULL) + heap = SMR_PTR_GET_LOCKED(&art->art_root); + if (heap == NULL) return (NULL); + at = art_heap_to_table(heap); + /* Default route */ if (plen == 0) { - node = srp_get_locked(&at->at_default); - srp_swap_locked(&at->at_default, NULL); - art_table_free(ar, at); - return (node); + ahep = &heap[ART_HEAP_IDX_DEFAULT]; + + ahe = SMR_PTR_GET_LOCKED(ahep); + an = art_heap_entry_to_node(ahe); + if (an == NULL) + return (NULL); + + SMR_PTR_SET_LOCKED(ahep, 0); + art_table_free(art, at); + + return (an); } /* @@ -464,53 +583,37 @@ art_delete(struct art_root *ar, struct a * the stride length at this level the entry must * be in the current table. */ - while (plen > (at->at_offset + at->at_bits)) { + while ((p = at->at_offset + at->at_bits) < plen) { /* Do a single level route lookup. */ - j = art_findex(at, addr); - node = srp_get_locked(&at->at_heap[j].node); + j = art_bindex(at->at_offset, at->at_bits, addr, p); + ahe = SMR_PTR_GET_LOCKED(&heap[j]); /* If this is a leaf, there is no route to delete. */ - if (ISLEAF(node)) + if (art_heap_entry_is_node(ahe)) return (NULL); - at = SUBTABLE(node); + heap = art_heap_entry_to_heap(ahe); + at = art_heap_to_table(heap); } - i = art_bindex(at, addr, plen); - if (i == -1) + i = art_bindex(at->at_offset, at->at_bits, addr, plen); + ahep = &heap[i]; + ahe = SMR_PTR_GET_LOCKED(ahep); + if (!art_heap_entry_is_node(ahe)) { + art_heap_entry *nheap = art_heap_entry_to_heap(ahe); + ahep = &nheap[ART_HEAP_IDX_DEFAULT]; + ahe = SMR_PTR_GET_LOCKED(ahep); + } + + an = art_heap_entry_to_node(ahe); + if (an == NULL) { + /* No route to delete */ return (NULL); - - return (art_table_delete(ar, at, i, an)); -} - -/* - * Single level deletion. - */ -struct art_node * -art_table_delete(struct art_root *ar, struct art_table *at, int i, - struct art_node *an) -{ - struct art_node *next, *node; - -#ifdef DIAGNOSTIC - struct art_node *prev; -#endif - - node = srp_get_locked(&at->at_heap[i].node); -#ifdef DIAGNOSTIC - if (!ISLEAF(node)) - prev = srp_get_locked(&SUBTABLE(node)->at_default); - else - prev = node; - - KASSERT(prev == an); -#endif + } /* Get the next most specific route for the index `i'. */ - if ((i >> 1) > 1) - next = srp_get_locked(&at->at_heap[i >> 1].node); - else - next = NULL; + j = i >> 1; + pahe = (j > 1) ? SMR_PTR_GET_LOCKED(&heap[j]) : 0; /* * If the index `i' of the route that we are removing is not @@ -518,20 +621,18 @@ art_table_delete(struct art_root *ar, st * route pointer to all the corresponding fringe indices. */ if (i < at->at_minfringe) - art_allot(at, i, an, next); - else if (!ISLEAF(node)) - srp_swap_locked(&SUBTABLE(node)->at_default, next); + art_allot(at, i, ahe, pahe); else - srp_swap_locked(&at->at_heap[i].node, next); + SMR_PTR_SET_LOCKED(ahep, pahe); /* We have removed an entry from this table. */ - art_table_free(ar, at); + art_table_free(art, at); return (an); } struct art_table * -art_table_ref(struct art_root *ar, struct art_table *at) +art_table_ref(struct art *art, struct art_table *at) { at->at_refcnt++; return (at); @@ -542,12 +643,11 @@ art_table_rele(struct art_table *at) { if (at == NULL) return (0); - return (--at->at_refcnt == 0); } int -art_table_free(struct art_root *ar, struct art_table *at) +art_table_free(struct art *art, struct art_table *at) { if (art_table_rele(at)) { /* @@ -555,7 +655,7 @@ art_table_free(struct art_root *ar, stru * that are empty. */ do { - at = art_table_put(ar, at); + at = art_table_put(art, at); } while (art_table_rele(at)); return (1); @@ -567,116 +667,187 @@ art_table_free(struct art_root *ar, stru /* * Iteration API function. */ -int -art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg) + +static struct art_node * +art_iter_descend(struct art_iter *ai, art_heap_entry *heap, + art_heap_entry pahe) { - struct srp_ref sr; - struct art_table *at; - struct art_node *node; - int error = 0; + struct art_table *at; + art_heap_entry ahe; - rw_enter_write(&ar->ar_lock); - at = srp_get_locked(&ar->ar_root); - if (at != NULL) { - art_table_ref(ar, at); + at = art_heap_to_table(heap); + ai->ai_table = art_table_ref(ai->ai_art, at); - /* - * The default route should be processed here because the root - * table does not have a parent. - */ - node = srp_enter(&sr, &at->at_default); - error = art_walk_apply(ar, node, NULL, f, arg); - srp_leave(&sr); + /* + * Start looking at non-fringe nodes. + */ + ai->ai_j = 1; - if (error == 0) - error = art_table_walk(ar, at, f, arg); + /* + * The default route (index 1) is processed by the + * parent table (where it belongs) otherwise it could + * be processed more than once. + */ + ai->ai_i = 2; + + /* + * Process the default route now. + */ + ahe = SMR_PTR_GET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT]); + if (ahe != 0 && ahe != pahe) + return (art_heap_entry_to_node(ahe)); - art_table_free(ar, at); + /* + * Tell the caller to proceed with art_iter_next. + */ + return (NULL); +} + +struct art_node * +art_iter_open(struct art *art, struct art_iter *ai) +{ + art_heap_entry *heap = SMR_PTR_GET(&art->art_root); + struct art_node *an; + + ai->ai_art = art; + + if (heap == NULL) { + /* empty, we're already done */ + ai->ai_table = NULL; + return (NULL); } - rw_exit_write(&ar->ar_lock); - return (error); + an = art_iter_descend(ai, heap, 0); + if (an != NULL) + return (an); /* default route */ + + return (art_iter_next(ai)); } -int -art_table_walk(struct art_root *ar, struct art_table *at, - int (*f)(struct art_node *, void *), void *arg) +struct art_node * +art_iter_next(struct art_iter *ai) { - struct srp_ref sr; - struct art_node *node, *next; - struct art_table *nat; - int i, j, error = 0; - uint32_t maxfringe = (at->at_minfringe << 1); + struct art_table *at = ai->ai_table; + art_heap_entry *heap = at->at_heap; + art_heap_entry ahe, pahe; + unsigned int i; +descend: /* * Iterate non-fringe nodes in ``natural'' order. */ - for (j = 1; j < at->at_minfringe; j += 2) { - /* - * The default route (index 1) is processed by the - * parent table (where it belongs) otherwise it could - * be processed more than once. - */ - for (i = max(j, 2); i < at->at_minfringe; i <<= 1) { - next = srp_get_locked(&at->at_heap[i >> 1].node); - - node = srp_enter(&sr, &at->at_heap[i].node); - error = art_walk_apply(ar, node, next, f, arg); - srp_leave(&sr); + if (ai->ai_j < at->at_minfringe) { + for (;;) { + while ((i = ai->ai_i) < at->at_minfringe) { + ai->ai_i = i << 1; + + pahe = SMR_PTR_GET_LOCKED(&heap[i >> 1]); + ahe = SMR_PTR_GET_LOCKED(&heap[i]); + if (ahe != 0 && ahe != pahe) + return (art_heap_entry_to_node(ahe)); + } - if (error != 0) - return (error); + ai->ai_j += 2; + if (ai->ai_j < at->at_minfringe) + ai->ai_i = ai->ai_j; + else { + /* Set up the fringe loop */ + ai->ai_i = at->at_minfringe; + break; + } } } /* - * Iterate fringe nodes. + * Descendent tables are only found on fringe nodes, and + * they record where they were placed in their parent. This + * allows the iterator to know where to resume traversal when + * it ascends back to the parent table. By keeping the table + * refs when going down the tree, the topology is preserved + * even if all the nodes are removed. */ - for (i = at->at_minfringe; i < maxfringe; i++) { - next = srp_get_locked(&at->at_heap[i >> 1].node); - node = srp_enter(&sr, &at->at_heap[i].node); - if (!ISLEAF(node)) { - nat = art_table_ref(ar, SUBTABLE(node)); - node = srp_follow(&sr, &nat->at_default); - } else - nat = NULL; + for (;;) { + unsigned int maxfringe = at->at_minfringe << 1; + struct art_table *parent; - error = art_walk_apply(ar, node, next, f, arg); - srp_leave(&sr); + /* + * Iterate fringe nodes. + */ + while ((i = ai->ai_i) < maxfringe) { + ai->ai_i = i + 1; - if (error != 0) { - art_table_free(ar, nat); - return (error); + pahe = SMR_PTR_GET_LOCKED(&heap[i >> 1]); + ahe = SMR_PTR_GET_LOCKED(&heap[i]); + if (art_heap_entry_is_node(ahe)) { + if (ahe != 0 && ahe != pahe) + return (art_heap_entry_to_node(ahe)); + } else { + struct art_node *an; + + heap = art_heap_entry_to_heap(ahe); + + an = art_iter_descend(ai, heap, pahe); + if (an != NULL) /* default route? */ + return (an); + + /* Start looping over the child table */ + at = art_heap_to_table(heap); + goto descend; + } } - if (nat != NULL) { - error = art_table_walk(ar, nat, f, arg); - art_table_free(ar, nat); - if (error != 0) - return (error); + /* + * Ascend back up to the parent + */ + parent = at->at_parent; + ai->ai_i = at->at_index + 1; + art_table_free(ai->ai_art, at); + + ai->ai_table = parent; + if (parent == NULL) { + /* The root table has no parent */ + break; } + + at = parent; + ai->ai_j = at->at_minfringe; + heap = at->at_heap; } - return (0); + return (NULL); } -int -art_walk_apply(struct art_root *ar, - struct art_node *an, struct art_node *next, - int (*f)(struct art_node *, void *), void *arg) +void +art_iter_close(struct art_iter *ai) { - int error = 0; + struct art_table *at, *parent; - if ((an != NULL) && (an != next)) { - rw_exit_write(&ar->ar_lock); - error = (*f)(an, arg); - rw_enter_write(&ar->ar_lock); + for (at = ai->ai_table; at != NULL; at = parent) { + parent = at->at_parent; + art_table_free(ai->ai_art, at); } - return (error); + ai->ai_table = NULL; } +int +art_walk(struct art *art, int (*f)(struct art_node *, void *), void *arg) +{ + struct art_iter ai; + struct art_node *an; + int error = 0; + + ART_FOREACH(an, art, &ai) { + error = f(an, arg); + if (error != 0) { + art_iter_close(&ai); + return (error); + } + } + + return (0); +} /* * Create a table and use the given index to set its default route. @@ -684,89 +855,90 @@ art_walk_apply(struct art_root *ar, * Note: This function does not modify the root or the parent. */ struct art_table * -art_table_get(struct art_root *ar, struct art_table *parent, int j) +art_table_get(struct art *art, struct art_table *parent, unsigned int j) { struct art_table *at; - struct art_node *node; - void *at_heap; - uint32_t lvl; + art_heap_entry *heap; + unsigned int level; + unsigned int bits; - KASSERT(j != 0 && j != 1); + KASSERT(j != ART_HEAP_IDX_TABLE && j != ART_HEAP_IDX_DEFAULT); KASSERT(parent != NULL || j == -1); - if (parent != NULL) - lvl = parent->at_level + 1; - else - lvl = 0; - - KASSERT(lvl < ar->ar_nlvl); + level = (parent != NULL) ? parent->at_level + 1 : 0; + KASSERT(level < art->art_nlevels); at = pool_get(&at_pool, PR_NOWAIT|PR_ZERO); if (at == NULL) return (NULL); - switch (AT_HEAPSIZE(ar->ar_bits[lvl])) { - case AT_HEAPSIZE(4): - at_heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO); + bits = art->art_levels[level]; + switch (bits) { + case 4: + heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO); break; - case AT_HEAPSIZE(8): - at_heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO); + case 8: + heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO); break; default: - panic("incorrect stride length %u", ar->ar_bits[lvl]); + panic("incorrect stride length %u", bits); } - if (at_heap == NULL) { + if (heap == NULL) { pool_put(&at_pool, at); return (NULL); } + SMR_PTR_SET_LOCKED(&heap[ART_HEAP_IDX_TABLE], (art_heap_entry)at); + at->at_parent = parent; at->at_index = j; - at->at_minfringe = (1 << ar->ar_bits[lvl]); - at->at_level = lvl; - at->at_bits = ar->ar_bits[lvl]; - at->at_heap = at_heap; + at->at_minfringe = (1 << bits); + at->at_level = level; + at->at_bits = bits; + at->at_heap = heap; at->at_refcnt = 0; if (parent != NULL) { - node = srp_get_locked(&parent->at_heap[j].node); - /* node isn't being deleted, no srp_finalize needed */ - srp_swap_locked(&at->at_default, node); + art_heap_entry ahe; + at->at_offset = (parent->at_offset + parent->at_bits); + + ahe = SMR_PTR_GET_LOCKED(&parent->at_heap[j]); + SMR_PTR_SET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT], ahe); } return (at); } - /* * Delete a table and use its index to restore its parent's default route. * * Note: Modify its parent to unlink the table from it. */ struct art_table * -art_table_put(struct art_root *ar, struct art_table *at) +art_table_put(struct art *art, struct art_table *at) { struct art_table *parent = at->at_parent; - struct art_node *node; - uint32_t j = at->at_index; + unsigned int j = at->at_index; KASSERT(at->at_refcnt == 0); KASSERT(j != 0 && j != 1); if (parent != NULL) { + art_heap_entry ahe; + KASSERT(j != -1); KASSERT(at->at_level == parent->at_level + 1); KASSERT(parent->at_refcnt >= 1); /* Give the route back to its parent. */ - node = srp_get_locked(&at->at_default); - srp_swap_locked(&parent->at_heap[j].node, node); + ahe = SMR_PTR_GET_LOCKED(&at->at_heap[ART_HEAP_IDX_DEFAULT]); + SMR_PTR_SET_LOCKED(&parent->at_heap[j], ahe); } else { KASSERT(j == -1); KASSERT(at->at_level == 0); - srp_swap_locked(&ar->ar_root, NULL); + SMR_PTR_SET_LOCKED(&art->art_root, NULL); } mtx_enter(&art_table_gc_mtx); @@ -789,19 +961,16 @@ art_table_gc(void *null) art_table_gc_list = NULL; mtx_leave(&art_table_gc_mtx); + smr_barrier(); + while (at != NULL) { next = at->at_parent; - if (at->at_level == 0) - srp_finalize(at, "arttfini"); - else - srp_finalize(ASNODE(at), "arttfini"); - - switch (AT_HEAPSIZE(at->at_bits)) { - case AT_HEAPSIZE(4): + switch (at->at_bits) { + case 4: pool_put(&at_heap_4_pool, at->at_heap); break; - case AT_HEAPSIZE(8): + case 8: pool_put(&at_heap_8_pool, at->at_heap); break; default: @@ -837,40 +1006,45 @@ art_table_gc(void *null) * art_allot(at, k, old, new); * } */ -void -art_allot(struct art_table *at, int i, struct art_node *old, - struct art_node *new) -{ - struct art_node *node, *dflt; - int k = i; +static void +art_allot(struct art_table *at, unsigned int i, + art_heap_entry oahe, art_heap_entry nahe) +{ + art_heap_entry ahe, *ahep; + art_heap_entry *heap; + unsigned int k = i; KASSERT(i < at->at_minfringe); + heap = at->at_heap; + again: k <<= 1; if (k < at->at_minfringe) goto nonfringe; /* Change fringe nodes. */ - while (1) { - node = srp_get_locked(&at->at_heap[k].node); - if (!ISLEAF(node)) { - dflt = srp_get_locked(&SUBTABLE(node)->at_default); - if (dflt == old) { - srp_swap_locked(&SUBTABLE(node)->at_default, - new); - } - } else if (node == old) { - srp_swap_locked(&at->at_heap[k].node, new); + for (;;) { + ahep = &heap[k]; + ahe = SMR_PTR_GET_LOCKED(ahep); + + if (!art_heap_entry_is_node(ahe)) { + art_heap_entry *child = art_heap_entry_to_heap(ahe); + ahep = &child[ART_HEAP_IDX_DEFAULT]; + ahe = SMR_PTR_GET_LOCKED(ahep); } + + if (ahe == oahe) + SMR_PTR_SET_LOCKED(ahep, nahe); + if (k % 2) goto moveup; k++; } nonfringe: - node = srp_get_locked(&at->at_heap[k].node); - if (node == old) + ahe = SMR_PTR_GET_LOCKED(&heap[k]); + if (ahe == oahe) goto again; moveon: if (k % 2) @@ -879,7 +1053,7 @@ moveon: goto nonfringe; moveup: k >>= 1; - srp_swap_locked(&at->at_heap[k].node, new); + SMR_PTR_SET_LOCKED(&heap[k], nahe); /* Change non-fringe node. */ if (k != i) @@ -887,7 +1061,7 @@ moveup: } struct art_node * -art_get(uint8_t plen) +art_get(const uint8_t *addr, unsigned int plen) { struct art_node *an; @@ -895,17 +1069,25 @@ art_get(uint8_t plen) if (an == NULL) return (NULL); - an->an_plen = plen; - SRPL_INIT(&an->an_rtlist); + art_node_init(an, addr, plen); return (an); } void -art_put(struct art_node *an) +art_node_init(struct art_node *an, const uint8_t *addr, unsigned int plen) { - KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist)); + size_t len; + + len = roundup(plen, 8) / 8; + KASSERT(len <= sizeof(an->an_addr)); + memcpy(an->an_addr, addr, len); + an->an_plen = plen; +} +void +art_put(struct art_node *an) +{ mtx_enter(&art_node_gc_mtx); an->an_gc = art_node_gc_list; art_node_gc_list = an; @@ -917,17 +1099,17 @@ art_put(struct art_node *an) void art_gc(void *null) { - struct art_node *an, *next; + struct art_node *an; mtx_enter(&art_node_gc_mtx); an = art_node_gc_list; art_node_gc_list = NULL; mtx_leave(&art_node_gc_mtx); - while (an != NULL) { - next = an->an_gc; + smr_barrier(); - srp_finalize(an, "artnfini"); + while (an != NULL) { + struct art_node *next = an->an_gc; pool_put(&an_pool, an); Index: sys/net/art.h =================================================================== RCS file: /cvs/src/sys/net/art.h,v diff -u -p -r1.27 art.h --- sys/net/art.h 7 Jul 2025 07:09:05 -0000 1.27 +++ sys/net/art.h 7 Jul 2025 09:26:05 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: art.h,v 1.27 2025/07/07 07:09:05 dlg Exp $ */ +/* $OpenBSD: art.h,v 1.25 2023/11/11 12:17:50 bluhm Exp $ */ /* * Copyright (c) 2015 Martin Pieuchot @@ -19,92 +19,151 @@ #ifndef _NET_ART_H_ #define _NET_ART_H_ -#include -#include +/* + * Allotment Routing Table (ART) + * + * Yoichi Hariguchi paper can be found at: + * http://www.hariguchi.org/art/art.pdf + * + * Locking: + * + * Modification (ie, art_insert or art_delete) and iteration + * (art_iter_next, etc) over the ART must be serialised by the caller. + * Lookups (ie, art_match and art_lookup) run within an SMR critical + * section. + * + * Iteration requires serialisation as it manipulates the reference + * counts on tables as it traverses the tree. The iterator maintains + * these references until it runs out of entries. This allows code + * iterating over the ART to release locks in between calls to + * art_iter_open and art_iter_next. The references may be dropped + * early with art_iter_close. + * + * Note, the iterator does not hold a reference to the art_node + * structure or the data hanging off the an_value pointer, they must + * be accounted for separately or their use must be serialised with + * art_delete. + */ -#define ART_MAXLVL 32 /* We currently use 32 levels for IPv6. */ +typedef uintptr_t art_heap_entry; /* - * Root of the ART tables, equivalent to the radix head. - * - * Locks used to protect struct members in this file: - * I immutable after creation - * l root's `ar_lock' - * K kernel lock - * For SRP related structures that allow lock-free reads, the write lock - * is indicated below. + * Root of the ART, equivalent to the radix head. */ -struct art_root { - struct srp ar_root; /* [l] First table */ - struct rwlock ar_lock; /* [] Serialise modifications */ - uint8_t ar_bits[ART_MAXLVL]; /* [I] Per level stride */ - uint8_t ar_nlvl; /* [I] Number of levels */ - uint8_t ar_alen; /* [I] Address length in bits */ -}; -#define ISLEAF(e) (((unsigned long)(e) & 1) == 0) -#define SUBTABLE(e) ((struct art_table *)((unsigned long)(e) & ~1)) -#define ASNODE(t) ((struct art_node *)((unsigned long)(t) | 1)) +struct art { + art_heap_entry *art_root; + const unsigned int *art_levels; + unsigned int art_nlevels; + unsigned int art_alen; +}; /* * Allotment Table. */ struct art_table { + art_heap_entry *at_heap; struct art_table *at_parent; /* Parent table */ - uint32_t at_index; /* Index in the parent table */ - uint32_t at_minfringe; /* Index that fringe begins */ - uint32_t at_level; /* Level of the table */ - uint8_t at_bits; /* Stride length of the table */ - uint8_t at_offset; /* Sum of parents' stride len */ - - /* - * Items stored in the heap are pointers to nodes, in the leaf - * case, or tables otherwise. One exception is index 0 which - * is a route counter. - */ - union { - struct srp node; - unsigned long count; - } *at_heap; /* Array of 2^(slen+1) items */ + + unsigned int at_index; /* Index in the parent table */ + unsigned int at_minfringe; /* Index that fringe begins */ + + unsigned int at_level; /* Level of the table */ + unsigned int at_bits; /* Stride length of the table */ + unsigned int at_offset; /* Sum of parents' stride len */ + + unsigned int at_refcnt; }; -#define at_refcnt at_heap[0].count/* Refcounter (1 per different route) */ -#define at_default at_heap[1].node /* Default route (was in parent heap) */ -/* Heap size for an ART table of stride length ``slen''. */ -#define AT_HEAPSIZE(slen) ((1 << ((slen) + 1)) * sizeof(void *)) +#define ART_HEAP_IDX_TABLE 0 +#define ART_HEAP_IDX_DEFAULT 1 -/* - * Forward declaration needed for the list of mpath routes - * attached to a single ART node. - */ -struct rtentry; +#define AT_HEAPSIZE(bits) ((1 << ((bits) + 1)) * sizeof(art_heap_entry)) /* * A node is the internal representation of a route entry. */ struct art_node { + void *an_value; union { - SRPL_HEAD(, rtentry) an__rtlist; /* Route related to this node */ - struct art_node *an__gc; /* Entry on GC list */ - } an_pointer; - uint8_t an_plen; /* Prefix length */ + struct art_node *an__gc; + uint8_t an__addr[16]; + } an__u; +#define an_gc an__u.an__gc +#define an_addr an__u.an__addr + unsigned int an_plen; }; -#define an_rtlist an_pointer.an__rtlist -#define an_gc an_pointer.an__gc -void art_init(void); -struct art_root *art_alloc(unsigned int, unsigned int); -struct art_node *art_insert(struct art_root *, struct art_node *, const void *, - int); -struct art_node *art_delete(struct art_root *, struct art_node *, const void *, - int); -struct art_node *art_match(struct art_root *, const void *, struct srp_ref *); -struct art_node *art_lookup(struct art_root *, const void *, int, - struct srp_ref *); -int art_walk(struct art_root *, +static inline struct art_table * +art_heap_to_table(art_heap_entry *heap) +{ + return ((struct art_table *)heap[ART_HEAP_IDX_TABLE]); +} + +static inline int +art_heap_entry_is_node(art_heap_entry ahe) +{ + return ((ahe & 1UL) == 0); +} + +static inline struct art_node * +art_heap_entry_to_node(art_heap_entry ahe) +{ + return ((struct art_node *)ahe); +} + +static inline art_heap_entry * +art_heap_entry_to_heap(art_heap_entry ahe) +{ + return ((art_heap_entry *)(ahe & ~1UL)); +} + +static inline art_heap_entry +art_node_to_heap_entry(struct art_node *an) +{ + return ((art_heap_entry)an); +} + +static inline art_heap_entry +art_heap_to_heap_entry(art_heap_entry *heap) +{ + return ((art_heap_entry)heap | 1UL); +} + +#ifdef _KERNEL +void art_boot(void); +struct art *art_alloc(unsigned int); +void art_init(struct art *, unsigned int); +struct art_node *art_insert(struct art *, struct art_node *); +struct art_node *art_delete(struct art *, const void *, unsigned int); +struct art_node *art_match(struct art *, const void *); +struct art_node *art_lookup(struct art *, const void *, unsigned int); +int art_is_empty(struct art *); + +struct art_node *art_get(const uint8_t *, unsigned int); +void art_node_init(struct art_node *, + const uint8_t *, unsigned int); +void art_put(struct art_node *); + +struct art_iter { + struct art *ai_art; + struct art_table *ai_table; + unsigned int ai_j; + unsigned int ai_i; +}; + +struct art_node *art_iter_open(struct art *, struct art_iter *); +struct art_node *art_iter_next(struct art_iter *); +void art_iter_close(struct art_iter *); + +#define ART_FOREACH(_an, _art, _ai) \ + for ((_an) = art_iter_open((_art), (_ai)); \ + (_an) != NULL; \ + (_an) = art_iter_next((_ai))) + +int art_walk(struct art *, int (*)(struct art_node *, void *), void *); -struct art_node *art_get(uint8_t); -void art_put(struct art_node *); +#endif /* _KERNEL */ #endif /* _NET_ART_H_ */ Index: sys/net/if_wg.c =================================================================== RCS file: /cvs/src/sys/net/if_wg.c,v diff -u -p -r1.44 if_wg.c --- sys/net/if_wg.c 7 Jul 2025 07:09:05 -0000 1.44 +++ sys/net/if_wg.c 7 Jul 2025 09:26:05 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: if_wg.c,v 1.44 2025/07/07 07:09:05 dlg Exp $ */ +/* $OpenBSD: if_wg.c,v 1.42 2025/04/17 12:42:50 tb Exp $ */ /* * Copyright (C) 2015-2020 Jason A. Donenfeld . All Rights Reserved. @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -41,6 +42,7 @@ #include #include +#include #include #include @@ -250,10 +252,11 @@ struct wg_softc { struct socket *sc_so6; #endif + struct rwlock sc_aip_lock; size_t sc_aip_num; - struct art_root *sc_aip4; + struct art *sc_aip4; #ifdef INET6 - struct art_root *sc_aip6; + struct art *sc_aip6; #endif struct rwlock sc_peer_lock; @@ -289,7 +292,7 @@ void wg_peer_counters_add(struct wg_peer int wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *); struct wg_peer * - wg_aip_lookup(struct art_root *, void *); + wg_aip_lookup(struct art *, void *); int wg_aip_remove(struct wg_softc *, struct wg_peer *, struct wg_aip_io *); @@ -608,7 +611,7 @@ wg_peer_counters_add(struct wg_peer *pee int wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d) { - struct art_root *root; + struct art *root; struct art_node *node; struct wg_aip *aip; int ret = 0; @@ -624,8 +627,10 @@ wg_aip_add(struct wg_softc *sc, struct w if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT|PR_ZERO)) == NULL) return ENOBUFS; - rw_enter_write(&root->ar_lock); - node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr); + art_node_init(&aip->a_node, &d->a_addr.addr_bytes, d->a_cidr); + + rw_enter_write(&sc->sc_aip_lock); + node = art_insert(root, &aip->a_node); if (node == &aip->a_node) { aip->a_peer = peer; @@ -641,18 +646,18 @@ wg_aip_add(struct wg_softc *sc, struct w aip->a_peer = peer; } } - rw_exit_write(&root->ar_lock); + rw_exit_write(&sc->sc_aip_lock); return ret; } struct wg_peer * -wg_aip_lookup(struct art_root *root, void *addr) +wg_aip_lookup(struct art *root, void *addr) { - struct srp_ref sr; struct art_node *node; - node = art_match(root, addr, &sr); - srp_leave(&sr); + smr_read_enter(); + node = art_match(root, addr); + smr_read_leave(); return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer; } @@ -660,8 +665,7 @@ wg_aip_lookup(struct art_root *root, voi int wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d) { - struct srp_ref sr; - struct art_root *root; + struct art *root; struct art_node *node; struct wg_aip *aip; int ret = 0; @@ -674,23 +678,24 @@ wg_aip_remove(struct wg_softc *sc, struc default: return EAFNOSUPPORT; } - rw_enter_write(&root->ar_lock); - if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) { + rw_enter_write(&sc->sc_aip_lock); + smr_read_enter(); + node = art_lookup(root, &d->a_addr, d->a_cidr); + smr_read_leave(); + if (node == NULL) { ret = ENOENT; } else if (((struct wg_aip *) node)->a_peer != peer) { ret = EXDEV; } else { aip = (struct wg_aip *)node; - if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL) + if (art_delete(root, &d->a_addr, d->a_cidr) == NULL) panic("art_delete failed to delete node %p", node); sc->sc_aip_num--; LIST_REMOVE(aip, a_entry); pool_put(&wg_aip_pool, aip); } - - srp_leave(&sr); - rw_exit_write(&root->ar_lock); + rw_exit_write(&sc->sc_aip_lock); return ret; } @@ -2725,10 +2730,11 @@ wg_clone_create(struct if_clone *ifc, in #endif sc->sc_aip_num = 0; - if ((sc->sc_aip4 = art_alloc(0, 32)) == NULL) + rw_init(&sc->sc_aip_lock, "wgaip"); + if ((sc->sc_aip4 = art_alloc(32)) == NULL) goto ret_02; #ifdef INET6 - if ((sc->sc_aip6 = art_alloc(0, 128)) == NULL) + if ((sc->sc_aip6 = art_alloc(128)) == NULL) goto ret_03; #endif Index: sys/net/if_wg.h =================================================================== RCS file: /cvs/src/sys/net/if_wg.h,v diff -u -p -r1.6 if_wg.h --- sys/net/if_wg.h 13 Oct 2024 00:53:21 -0000 1.6 +++ sys/net/if_wg.h 7 Jul 2025 09:26:05 -0000 @@ -46,6 +46,7 @@ struct wg_aip_io { sa_family_t a_af; int a_cidr; union wg_aip_addr { + uint8_t addr_bytes; struct in_addr addr_ipv4; struct in6_addr addr_ipv6; } a_addr; Index: sys/net/route.h =================================================================== RCS file: /cvs/src/sys/net/route.h,v diff -u -p -r1.216 route.h --- sys/net/route.h 16 Mar 2025 21:58:08 -0000 1.216 +++ sys/net/route.h 7 Jul 2025 09:26:05 -0000 @@ -40,7 +40,7 @@ * I immutable after creation * N net lock * X exclusive net lock, or shared net lock + kernel lock - * R art (rtable) lock + * R rtable lock * r per route entry mutex rt_mtx * L arp/nd6/etc lock for updates, net lock for reads * T rttimer_mtx route timer lists @@ -117,7 +117,7 @@ struct rttimer; struct rtentry { struct sockaddr *rt_dest; /* [I] destination */ - SRPL_ENTRY(rtentry) rt_next; /* [R] next mpath entry to our dst */ + struct rtentry *rt_next; /* [R] next mpath entry to our dst */ struct sockaddr *rt_gateway; /* [X] gateway address */ struct ifaddr *rt_ifa; /* [N] interface addr to use */ caddr_t rt_llinfo; /* [L] pointer to link level info or Index: sys/net/rtable.c =================================================================== RCS file: /cvs/src/sys/net/rtable.c,v diff -u -p -r1.91 rtable.c --- sys/net/rtable.c 7 Jul 2025 07:09:05 -0000 1.91 +++ sys/net/rtable.c 7 Jul 2025 09:26:05 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: rtable.c,v 1.91 2025/07/07 07:09:05 dlg Exp $ */ +/* $OpenBSD: rtable.c,v 1.87 2024/04/09 12:53:08 claudio Exp $ */ /* * Copyright (c) 2014-2016 Martin Pieuchot @@ -23,8 +23,10 @@ #include #include #include +#include #include #include +#include #endif #include @@ -56,7 +58,7 @@ uint8_t af2idx_max; /* Array of routing table pointers. */ struct rtmap { unsigned int limit; - struct rtable **tbl; + void **tbl; }; /* @@ -294,7 +296,7 @@ rtable_empty(unsigned int rtableid) tbl = rtable_get(rtableid, dp->dom_family); if (tbl == NULL) continue; - if (tbl->r_art->ar_root.ref != NULL) + if (!art_is_empty(tbl->r_art)) return (0); } @@ -352,18 +354,12 @@ rtable_l2set(unsigned int rtableid, unsi static inline const uint8_t *satoaddr(struct rtable *, const struct sockaddr *); -int an_match(struct art_node *, const struct sockaddr *, int); -void rtentry_ref(void *, void *); -void rtentry_unref(void *, void *); - void rtable_mpath_insert(struct art_node *, struct rtentry *); -struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL); - void rtable_init_backend(void) { - art_init(); + art_boot(); } struct rtable * @@ -375,12 +371,13 @@ rtable_alloc(unsigned int rtableid, unsi if (tbl == NULL) return (NULL); - tbl->r_art = art_alloc(rtableid, alen); + tbl->r_art = art_alloc(alen); if (tbl->r_art == NULL) { free(tbl, M_RTABLE, sizeof(*tbl)); return (NULL); } + rw_init(&tbl->r_lock, "rtable"); tbl->r_off = off; tbl->r_source = NULL; @@ -435,38 +432,33 @@ rtable_lookup(unsigned int rtableid, con const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio) { struct rtable *tbl; - struct art_root *ar; struct art_node *an; struct rtentry *rt = NULL; - struct srp_ref sr, nsr; const uint8_t *addr; int plen; tbl = rtable_get(rtableid, dst->sa_family); if (tbl == NULL) return (NULL); - ar = tbl->r_art; addr = satoaddr(tbl, dst); - /* No need for a perfect match. */ + smr_read_enter(); if (mask == NULL) { - an = art_match(ar, addr, &nsr); - if (an == NULL) - goto out; + /* No need for a perfect match. */ + an = art_match(tbl->r_art, addr); } else { plen = rtable_satoplen(dst->sa_family, mask); if (plen == -1) - return (NULL); - - an = art_lookup(ar, addr, plen, &nsr); - - /* Make sure we've got a perfect match. */ - if (!an_match(an, dst, plen)) goto out; + + an = art_lookup(tbl->r_art, addr, plen); } + if (an == NULL) + goto out; - SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { + for (rt = SMR_PTR_GET(&an->an_value); rt != NULL; + rt = SMR_PTR_GET(&rt->rt_next)) { if (prio != RTP_ANY && (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) continue; @@ -481,9 +473,8 @@ rtable_lookup(unsigned int rtableid, con if (rt != NULL) rtref(rt); - SRPL_LEAVE(&sr); out: - srp_leave(&nsr); + smr_read_leave(); return (rt); } @@ -492,45 +483,40 @@ struct rtentry * rtable_match(unsigned int rtableid, const struct sockaddr *dst, uint32_t *src) { struct rtable *tbl; - struct art_root *ar; struct art_node *an; struct rtentry *rt = NULL; - struct srp_ref sr, nsr; const uint8_t *addr; int hash; + uint8_t prio; tbl = rtable_get(rtableid, dst->sa_family); if (tbl == NULL) return (NULL); - ar = tbl->r_art; addr = satoaddr(tbl, dst); - an = art_match(ar, addr, &nsr); + smr_read_enter(); + an = art_match(tbl->r_art, addr); if (an == NULL) goto out; - rt = SRPL_FIRST(&sr, &an->an_rtlist); - if (rt == NULL) { - SRPL_LEAVE(&sr); - goto out; - } - rtref(rt); - SRPL_LEAVE(&sr); + rt = SMR_PTR_GET(&an->an_value); + KASSERT(rt != NULL); + prio = rt->rt_priority; /* Gateway selection by Hash-Threshold (RFC 2992) */ if ((hash = rt_hash(rt, dst, src)) != -1) { struct rtentry *mrt; - int threshold, npaths = 0; + int threshold, npaths = 1; KASSERT(hash <= 0xffff); - SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) { - /* Only count nexthops with the same priority. */ - if (mrt->rt_priority == rt->rt_priority) + /* Only count nexthops with the same priority. */ + mrt = rt; + while ((mrt = SMR_PTR_GET(&mrt->rt_next)) != NULL) { + if (mrt->rt_priority == prio) npaths++; } - SRPL_LEAVE(&sr); threshold = (0xffff / npaths) + 1; @@ -539,27 +525,22 @@ rtable_match(unsigned int rtableid, cons * route list attached to the node, so we won't necessarily * have the same number of routes. for most modifications, * we'll pick a route that we wouldn't have if we only saw the - * list before or after the change. if we were going to use - * the last available route, but it got removed, we'll hit - * the end of the list and then pick the first route. + * list before or after the change. */ - - mrt = SRPL_FIRST(&sr, &an->an_rtlist); - while (hash > threshold && mrt != NULL) { - if (mrt->rt_priority == rt->rt_priority) + mrt = rt; + while (hash > threshold) { + if (mrt->rt_priority == prio) { + rt = mrt; hash -= threshold; - mrt = SRPL_FOLLOW(&sr, mrt, rt_next); - } - - if (mrt != NULL) { - rtref(mrt); - rtfree(rt); - rt = mrt; + } + mrt = SMR_PTR_GET(&mrt->rt_next); + if (mrt == NULL) + break; } - SRPL_LEAVE(&sr); } + rtref(rt); out: - srp_leave(&nsr); + smr_read_leave(); return (rt); } @@ -569,9 +550,6 @@ rtable_insert(unsigned int rtableid, str struct rtentry *rt) { struct rtable *tbl; - struct rtentry *mrt; - struct srp_ref sr; - struct art_root *ar; struct art_node *an, *prev; const uint8_t *addr; int plen; @@ -581,24 +559,49 @@ rtable_insert(unsigned int rtableid, str tbl = rtable_get(rtableid, dst->sa_family); if (tbl == NULL) return (EAFNOSUPPORT); - ar = tbl->r_art; addr = satoaddr(tbl, dst); plen = rtable_satoplen(dst->sa_family, mask); if (plen == -1) return (EINVAL); - rtref(rt); /* guarantee rtfree won't do anything during insert */ - rw_enter_write(&ar->ar_lock); + an = art_get(addr, plen); + if (an == NULL) + return (ENOMEM); - /* Do not permit exactly the same dst/mask/gw pair. */ - an = art_lookup(ar, addr, plen, &sr); - srp_leave(&sr); /* an can't go away while we have the lock */ - if (an_match(an, dst, plen)) { - struct rtentry *mrt; - int mpathok = ISSET(rt->rt_flags, RTF_MPATH); + /* prepare for immediate operation if insert succeeds */ + rt_flags = rt->rt_flags; + rt->rt_flags &= ~RTF_MPATH; + rt->rt_dest = dst; + rt->rt_plen = plen; + rt->rt_next = NULL; + + rtref(rt); /* take a ref for the table */ + an->an_value = rt; + + rw_enter_write(&tbl->r_lock); + prev = art_insert(tbl->r_art, an); + if (prev == NULL) { + error = ENOMEM; + goto put; + } + + if (prev != an) { + struct rtentry *mrt; + int mpathok = ISSET(rt_flags, RTF_MPATH); + int mpath = 0; + + /* + * An ART node with the same destination/netmask already + * exists. + */ + art_put(an); + an = prev; - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { + /* Do not permit exactly the same dst/mask/gw pair. */ + for (mrt = SMR_PTR_GET_LOCKED(&an->an_value); + mrt != NULL; + mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) { if (prio != RTP_ANY && (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) continue; @@ -610,63 +613,34 @@ rtable_insert(unsigned int rtableid, str error = EEXIST; goto leave; } + mpath = RTF_MPATH; } - } - - an = art_get(plen); - if (an == NULL) { - error = ENOBUFS; - goto leave; - } - - /* prepare for immediate operation if insert succeeds */ - rt_flags = rt->rt_flags; - rt->rt_flags &= ~RTF_MPATH; - rt->rt_dest = dst; - rt->rt_plen = plen; - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); - prev = art_insert(ar, an, addr, plen); - if (prev != an) { - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, - rt_next); - rt->rt_flags = rt_flags; - art_put(an); - - if (prev == NULL) { - error = ESRCH; - goto leave; - } - - an = prev; - - mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); - KASSERT(mrt != NULL); - KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio); + /* The new route can be added to the list. */ + if (mpath) { + SET(rt->rt_flags, RTF_MPATH); + + for (mrt = SMR_PTR_GET_LOCKED(&an->an_value); + mrt != NULL; + mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) { + if ((mrt->rt_priority & RTP_MASK) != + (prio & RTP_MASK)) + continue; - /* - * An ART node with the same destination/netmask already - * exists, MPATH conflict must have been already checked. - */ - if (rt->rt_flags & RTF_MPATH) { - /* - * Only keep the RTF_MPATH flag if two routes have - * the same gateway. - */ - rt->rt_flags &= ~RTF_MPATH; - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { - if (mrt->rt_priority == prio) { - mrt->rt_flags |= RTF_MPATH; - rt->rt_flags |= RTF_MPATH; - } + SET(mrt->rt_flags, RTF_MPATH); } } /* Put newly inserted entry at the right place. */ rtable_mpath_insert(an, rt); } + rw_exit_write(&tbl->r_lock); + return (error); + +put: + art_put(an); leave: - rw_exit_write(&ar->ar_lock); + rw_exit_write(&tbl->r_lock); rtfree(rt); return (error); } @@ -676,99 +650,76 @@ rtable_delete(unsigned int rtableid, con const struct sockaddr *mask, struct rtentry *rt) { struct rtable *tbl; - struct art_root *ar; struct art_node *an; - struct srp_ref sr; const uint8_t *addr; int plen; struct rtentry *mrt; - int npaths = 0; - int error = 0; tbl = rtable_get(rtableid, dst->sa_family); if (tbl == NULL) return (EAFNOSUPPORT); - ar = tbl->r_art; addr = satoaddr(tbl, dst); plen = rtable_satoplen(dst->sa_family, mask); if (plen == -1) return (EINVAL); - rtref(rt); /* guarantee rtfree won't do anything under ar_lock */ - rw_enter_write(&ar->ar_lock); - an = art_lookup(ar, addr, plen, &sr); - srp_leave(&sr); /* an can't go away while we have the lock */ - - /* Make sure we've got a perfect match. */ - if (!an_match(an, dst, plen)) { - error = ESRCH; - goto leave; + rw_enter_write(&tbl->r_lock); + smr_read_enter(); + an = art_lookup(tbl->r_art, addr, plen); + smr_read_leave(); + if (an == NULL) { + rw_exit_write(&tbl->r_lock); + return (ESRCH); } - /* - * If other multipath route entries are still attached to - * this ART node we only have to unlink it. - */ - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) - npaths++; - - if (npaths > 1) { - KASSERT(refcnt_read(&rt->rt_refcnt) >= 1); - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, - rt_next); - - mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); - if (npaths == 2) - mrt->rt_flags &= ~RTF_MPATH; + /* If this is the only route in the list then we can delete the node */ + if (SMR_PTR_GET_LOCKED(&an->an_value) == rt && + SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) { + struct art_node *oan; + oan = art_delete(tbl->r_art, addr, plen); + if (oan != an) + panic("art %p changed shape during delete", tbl->r_art); + art_put(an); + /* + * XXX an and the rt ref could still be alive on other cpus. + * this currently works because of the NET_LOCK/KERNEL_LOCK + * but should be fixed if we want to do route lookups outside + * these locks. - dlg@ + */ + } else { + struct rtentry **prt; + struct rtentry *nrt; + unsigned int found = 0; + unsigned int npaths = 0; - goto leave; + /* + * If other multipath route entries are still attached to + * this ART node we only have to unlink it. + */ + prt = (struct rtentry **)&an->an_value; + while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) { + if (mrt == rt) { + found = 1; + SMR_PTR_SET_LOCKED(prt, + SMR_PTR_GET_LOCKED(&mrt->rt_next)); + } else if ((mrt->rt_priority & RTP_MASK) == + (rt->rt_priority & RTP_MASK)) { + npaths++; + nrt = mrt; + } + prt = &mrt->rt_next; + } + if (!found) + panic("removing non-existent route"); + if (npaths == 1) + CLR(nrt->rt_flags, RTF_MPATH); } - - if (art_delete(ar, an, addr, plen) == NULL) - panic("art_delete failed to find node %p", an); - KASSERT(refcnt_read(&rt->rt_refcnt) >= 1); - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); - art_put(an); - -leave: - rw_exit_write(&ar->ar_lock); + rw_exit_write(&tbl->r_lock); rtfree(rt); - return (error); -} - -struct rtable_walk_cookie { - int (*rwc_func)(struct rtentry *, void *, unsigned int); - void *rwc_arg; - struct rtentry **rwc_prt; - unsigned int rwc_rid; -}; - -/* - * Helper for rtable_walk to keep the ART code free from any "struct rtentry". - */ -int -rtable_walk_helper(struct art_node *an, void *xrwc) -{ - struct srp_ref sr; - struct rtable_walk_cookie *rwc = xrwc; - struct rtentry *rt; - int error = 0; - - SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { - error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid); - if (error != 0) - break; - } - if (rwc->rwc_prt != NULL && rt != NULL) { - rtref(rt); - *rwc->rwc_prt = rt; - } - SRPL_LEAVE(&sr); - - return (error); + return (0); } int @@ -776,21 +727,58 @@ rtable_walk(unsigned int rtableid, sa_fa int (*func)(struct rtentry *, void *, unsigned int), void *arg) { struct rtable *tbl; - struct art_root *ar; - struct rtable_walk_cookie rwc; + struct art_iter ai; + struct art_node *an; int error; tbl = rtable_get(rtableid, af); if (tbl == NULL) return (EAFNOSUPPORT); - ar = tbl->r_art; - rwc.rwc_func = func; - rwc.rwc_arg = arg; - rwc.rwc_prt = prt; - rwc.rwc_rid = rtableid; + rw_enter_write(&tbl->r_lock); + ART_FOREACH(an, tbl->r_art, &ai) { + /* + * ART nodes have a list of rtentries. + * + * art_iter holds references to the topology + * so it won't change, but not the an_node or rtentries. + */ + struct rtentry *rt = SMR_PTR_GET_LOCKED(&an->an_value); + rtref(rt); + + rw_exit_write(&tbl->r_lock); + do { + struct rtentry *nrt; + + smr_read_enter(); + /* Get ready for the next entry. */ + nrt = SMR_PTR_GET(&rt->rt_next); + if (nrt != NULL) + rtref(nrt); + smr_read_leave(); + + error = func(rt, arg, rtableid); + if (error != 0) { + if (prt != NULL) + *prt = rt; + else + rtfree(rt); + + if (nrt != NULL) + rtfree(nrt); + + rw_enter_write(&tbl->r_lock); + art_iter_close(&ai); + rw_exit_write(&tbl->r_lock); + return (error); + } - error = art_walk(ar, rtable_walk_helper, &rwc); + rtfree(rt); + rt = nrt; + } while (rt != NULL); + rw_enter_write(&tbl->r_lock); + } + rw_exit_write(&tbl->r_lock); return (error); } @@ -799,12 +787,12 @@ struct rtentry * rtable_iterate(struct rtentry *rt0) { struct rtentry *rt = NULL; - struct srp_ref sr; - rt = SRPL_NEXT(&sr, rt0, rt_next); + smr_read_enter(); + rt = SMR_PTR_GET(&rt0->rt_next); if (rt != NULL) rtref(rt); - SRPL_LEAVE(&sr); + smr_read_leave(); rtfree(rt0); return (rt); } @@ -820,28 +808,24 @@ rtable_mpath_reprio(unsigned int rtablei int plen, uint8_t prio, struct rtentry *rt) { struct rtable *tbl; - struct art_root *ar; struct art_node *an; - struct srp_ref sr; const uint8_t *addr; int error = 0; tbl = rtable_get(rtableid, dst->sa_family); if (tbl == NULL) return (EAFNOSUPPORT); - ar = tbl->r_art; addr = satoaddr(tbl, dst); - rw_enter_write(&ar->ar_lock); - an = art_lookup(ar, addr, plen, &sr); - srp_leave(&sr); /* an can't go away while we have the lock */ - - /* Make sure we've got a perfect match. */ - if (!an_match(an, dst, plen)) { + rw_enter_write(&tbl->r_lock); + smr_read_enter(); + an = art_lookup(tbl->r_art, addr, plen); + smr_read_leave(); + if (an == NULL) { error = ESRCH; - } else if (SRPL_FIRST_LOCKED(&an->an_rtlist) == rt && - SRPL_NEXT_LOCKED(rt, rt_next) == NULL) { + } else if (SMR_PTR_GET_LOCKED(&an->an_value) == rt && + SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) { /* * If there's only one entry on the list do not go * through an insert/remove cycle. This is done to @@ -850,15 +834,23 @@ rtable_mpath_reprio(unsigned int rtablei */ rt->rt_priority = prio; } else { - rtref(rt); /* keep rt alive in between remove and insert */ - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, - rt, rtentry, rt_next); + struct rtentry **prt; + struct rtentry *mrt; + + prt = (struct rtentry **)&an->an_value; + while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) { + if (mrt == rt) + break; + prt = &mrt->rt_next; + } + KASSERT(mrt != NULL); + + SMR_PTR_SET_LOCKED(prt, SMR_PTR_GET_LOCKED(&rt->rt_next)); rt->rt_priority = prio; rtable_mpath_insert(an, rt); - rtfree(rt); error = EAGAIN; } - rw_exit_write(&ar->ar_lock); + rw_exit_write(&tbl->r_lock); return (error); } @@ -866,63 +858,21 @@ rtable_mpath_reprio(unsigned int rtablei void rtable_mpath_insert(struct art_node *an, struct rtentry *rt) { - struct rtentry *mrt, *prt = NULL; + struct rtentry *mrt, **prt; uint8_t prio = rt->rt_priority; - if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) == NULL) { - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); - return; - } - /* Iterate until we find the route to be placed after ``rt''. */ - while (mrt->rt_priority <= prio && SRPL_NEXT_LOCKED(mrt, rt_next)) { - prt = mrt; - mrt = SRPL_NEXT_LOCKED(mrt, rt_next); - } - if (mrt->rt_priority <= prio) { - SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next); - } else if (prt != NULL) { - SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, rt_next); - } else { - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); - } -} - -/* - * Returns 1 if ``an'' perfectly matches (``dst'', ``plen''), 0 otherwise. - */ -int -an_match(struct art_node *an, const struct sockaddr *dst, int plen) -{ - struct rtentry *rt; - struct srp_ref sr; - int match; - - if (an == NULL || an->an_plen != plen) - return (0); - - rt = SRPL_FIRST(&sr, &an->an_rtlist); - match = (rt != NULL && memcmp(rt->rt_dest, dst, dst->sa_len) == 0); - SRPL_LEAVE(&sr); - - return (match); -} - -void -rtentry_ref(void *null, void *xrt) -{ - struct rtentry *rt = xrt; - - rtref(rt); -} + prt = (struct rtentry **)&an->an_value; + while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) { + if (mrt->rt_priority >= prio) + break; -void -rtentry_unref(void *null, void *xrt) -{ - struct rtentry *rt = xrt; + prt = &mrt->rt_next; + } - rtfree(rt); + SMR_PTR_SET_LOCKED(&rt->rt_next, mrt); + SMR_PTR_SET_LOCKED(prt, rt); } /* Index: sys/net/rtable.h =================================================================== RCS file: /cvs/src/sys/net/rtable.h,v diff -u -p -r1.33 rtable.h --- sys/net/rtable.h 7 Jul 2025 07:09:05 -0000 1.33 +++ sys/net/rtable.h 7 Jul 2025 09:26:05 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: rtable.h,v 1.33 2025/07/07 07:09:05 dlg Exp $ */ +/* $OpenBSD: rtable.h,v 1.30 2024/05/13 01:15:53 jsg Exp $ */ /* * Copyright (c) 2014-2016 Martin Pieuchot @@ -19,7 +19,7 @@ #ifndef _NET_RTABLE_H_ #define _NET_RTABLE_H_ -struct art_root; +struct art; /* * Locks used to protect struct members in this file: @@ -28,7 +28,8 @@ struct art_root; */ struct rtable { - struct art_root *r_art; /* [I] */ + struct rwlock r_lock; + struct art *r_art; /* [I] */ unsigned int r_off; /* [I] Offset of key in bytes */ struct sockaddr *r_source; /* [N] use optional src addr */ Index: usr.bin/netstat/route.c =================================================================== RCS file: /cvs/src/usr.bin/netstat/route.c,v diff -u -p -r1.111 route.c --- usr.bin/netstat/route.c 7 Jul 2025 06:33:40 -0000 1.111 +++ usr.bin/netstat/route.c 7 Jul 2025 09:26:05 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: route.c,v 1.111 2025/07/07 06:33:40 dlg Exp $ */ +/* $OpenBSD: route.c,v 1.110 2023/11/14 10:31:22 claudio Exp $ */ /* $NetBSD: route.c,v 1.15 1996/05/07 02:55:06 thorpej Exp $ */ /* @@ -33,14 +33,12 @@ #include #include #include +#include +#include +#include #include #include -#define _KERNEL -#include -#include -#include -#undef _KERNEL #include #include #include @@ -52,10 +50,16 @@ #include #include #include +#include #include #include #include +#define _KERNEL +#include +#include +#undef _KERNEL + #include "netstat.h" static union { @@ -68,8 +72,8 @@ struct rtentry rtentry; static struct sockaddr *kgetsa(struct sockaddr *); static struct sockaddr *plentosa(sa_family_t, int, struct sockaddr *); -static struct art_node *getdefault(struct art_table *); -static void p_table(struct art_table *); +static struct art_node *getdefault(art_heap_entry *); +static void p_table(art_heap_entry *); static void p_artnode(struct art_node *); static void p_krtentry(struct rtentry *); @@ -80,7 +84,7 @@ void routepr(u_long afmap, u_long af2idx, u_long af2idx_max, u_int tableid) { struct rtable tbl; - struct art_root ar; + struct art art; struct art_node *node; struct srp *afm_head, *afm; struct { @@ -129,19 +133,19 @@ routepr(u_long afmap, u_long af2idx, u_l free(tblmap); - kread((u_long)tbl.r_art, &ar, sizeof(ar)); + kread((u_long)tbl.r_art, &art, sizeof(art)); - if (ar.ar_root.ref == NULL) + if (art.art_root == NULL) continue; pr_family(i); pr_rthdr(i, Aflag); - node = getdefault(ar.ar_root.ref); + node = getdefault(art.art_root); if (node != NULL) p_artnode(node); - p_table(ar.ar_root.ref); + p_table(art.art_root); } free(afm); @@ -200,64 +204,52 @@ plentosa(sa_family_t af, int plen, struc } static struct art_node * -getdefault(struct art_table *at) +getdefault(art_heap_entry *heap) { - struct art_node *node; - struct art_table table; - union { - struct srp node; - unsigned long count; - } *heap; - - kread((u_long)at, &table, sizeof(table)); - heap = calloc(1, AT_HEAPSIZE(table.at_bits)); - kread((u_long)table.at_heap, heap, AT_HEAPSIZE(table.at_bits)); - - node = heap[1].node.ref; - - free(heap); - - return (node); + art_heap_entry entry; + kread((u_long)(heap + ART_HEAP_IDX_DEFAULT), &entry, sizeof(&entry)); + return (art_heap_entry_to_node(entry)); } static void -p_table(struct art_table *at) +p_table(art_heap_entry *kheap) { + art_heap_entry entry, *heap, *nheap; struct art_node *next, *node; - struct art_table *nat, table; - union { - struct srp node; - unsigned long count; - } *heap; + struct art_table table; int i, j; - kread((u_long)at, &table, sizeof(table)); + kread((u_long)(kheap + ART_HEAP_IDX_TABLE), &entry, sizeof(entry)); + kread((u_long)entry, &table, sizeof(table)); heap = calloc(1, AT_HEAPSIZE(table.at_bits)); - kread((u_long)table.at_heap, heap, AT_HEAPSIZE(table.at_bits)); + kread((u_long)kheap, heap, AT_HEAPSIZE(table.at_bits)); for (j = 1; j < table.at_minfringe; j += 2) { for (i = (j > 2) ? j : 2; i < table.at_minfringe; i <<= 1) { - next = heap[i >> 1].node.ref; - node = heap[i].node.ref; + next = art_heap_entry_to_node(heap[i >> 1]); + node = art_heap_entry_to_node(heap[i]); if (node != NULL && node != next) p_artnode(node); } } for (i = table.at_minfringe; i < table.at_minfringe << 1; i++) { - next = heap[i >> 1].node.ref; - node = heap[i].node.ref; - if (!ISLEAF(node)) { - nat = SUBTABLE(node); - node = getdefault(nat); - } else - nat = NULL; + next = art_heap_entry_to_node(heap[i >> 1]); + + entry = heap[i]; + if (art_heap_entry_is_node(entry)) { + nheap = NULL; + node = art_heap_entry_to_node(entry); + } else { + nheap = art_heap_entry_to_heap(entry); + node = getdefault(nheap); + } if (node != NULL && node != next) p_artnode(node); - if (nat != NULL) - p_table(nat); + if (nheap != NULL) + p_table(nheap); } free(heap); @@ -270,14 +262,14 @@ p_artnode(struct art_node *an) struct rtentry *rt; kread((u_long)an, &node, sizeof(node)); - rt = node.an_rtlist.sl_head.ref; + rt = node.an_value; while (rt != NULL) { kread((u_long)rt, &rtentry, sizeof(rtentry)); if (Aflag) printf("%-16p ", rt); p_krtentry(&rtentry); - rt = rtentry.rt_next.se_next.ref; + rt = rtentry.rt_next; } } Index: regress/sys/net/rtable/kern_compat.h =================================================================== RCS file: /cvs/src/regress/sys/net/rtable/kern_compat.h,v diff -u -p -r1.16 kern_compat.h --- regress/sys/net/rtable/kern_compat.h 11 Nov 2023 07:34:54 -0000 1.16 +++ regress/sys/net/rtable/kern_compat.h 7 Jul 2025 09:26:05 -0000 @@ -23,10 +23,14 @@ #include #include #include +#include #include +#include "smr_compat.h" #include "srp_compat.h" +#define membar_producer() __sync_synchronize() + #define _KERNEL #define DIAGNOSTIC #define INET @@ -65,6 +69,8 @@ struct pool { #define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) #endif +#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) + #ifndef IPL_NONE #define IPL_NONE 0 #endif @@ -89,5 +95,16 @@ extern struct domain *domains[]; struct rtentry; int rt_hash(struct rtentry *, const struct sockaddr *, uint32_t *); + +#define READ_ONCE(x) ({ \ + typeof(x) __tmp = *(volatile typeof(x) *)&(x); \ + __tmp; \ +}) + +#define WRITE_ONCE(x, val) ({ \ + typeof(x) __tmp = (val); \ + *(volatile typeof(x) *)&(x) = __tmp; \ + __tmp; \ +}) #endif /* _KERN_COMPAT_H_ */ Index: regress/sys/net/rtable/smr_compat.h =================================================================== RCS file: regress/sys/net/rtable/smr_compat.h diff -N regress/sys/net/rtable/smr_compat.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ regress/sys/net/rtable/smr_compat.h 7 Jul 2025 09:26:05 -0000 @@ -0,0 +1,14 @@ + +#ifndef _SMR_COMPAT_H_ +#define _SMR_COMPAT_H_ + +#include + +void smr_read_enter(void); +void smr_read_leave(void); + +void SMR_ASSERT_CRITICAL(void); + +#define smr_barrier() do { } while (0) + +#endif /* _SMR_COMPAT_H_ */ Index: regress/sys/net/rtable/util.c =================================================================== RCS file: /cvs/src/regress/sys/net/rtable/util.c,v diff -u -p -r1.16 util.c --- regress/sys/net/rtable/util.c 14 Feb 2025 06:25:00 -0000 1.16 +++ regress/sys/net/rtable/util.c 7 Jul 2025 09:26:05 -0000 @@ -1,4 +1,3 @@ -/* $OpenBSD: util.c,v 1.16 2025/02/14 06:25:00 anton Exp $ */ /* * Copyright (c) 2015 Martin Pieuchot @@ -52,6 +51,7 @@ #undef _KERNEL #include "srp_compat.h" +#include "smr_compat.h" #include #include @@ -262,14 +262,16 @@ rtentry_delete(struct rtentry *rt, void struct sockaddr_in6 sa_mask; struct sockaddr *mask = rt_plen2mask(rt, &sa_mask); int error; + unsigned int refs; assert(rt_plen(rt) == rtable_satoplen(af, mask)); - + refs = refcnt_read(&rt->rt_refcnt); + assert(refs > 0); if ((error = rtable_delete(0, rt_key(rt), mask, rt)) != 0) { inet_net_satop(af, rt_key(rt), rt_plen(rt), dest, sizeof(dest)); errx(1, "can't rm route: %s, %s", dest, strerror(error)); } - assert(refcnt_read(&rt->rt_refcnt) == 0); + assert(refcnt_read(&rt->rt_refcnt) == refs - 1); return (0); } @@ -576,4 +578,28 @@ rt_hash(struct rtentry *rt, const struct void rt_timer_init(void) { +} + +/* + * SMR stuff + */ + +unsigned int smr_crit; + +void +smr_read_enter(void) +{ + smr_crit++; +} + +void +smr_read_leave(void) +{ + smr_crit--; +} + +void +SMR_ASSERT_CRITICAL(void) +{ + assert(smr_crit > 0); } Index: regress/sys/net/rtable/delete/main.c =================================================================== RCS file: /cvs/src/regress/sys/net/rtable/delete/main.c,v diff -u -p -r1.8 main.c --- regress/sys/net/rtable/delete/main.c 7 Jul 2025 06:33:40 -0000 1.8 +++ regress/sys/net/rtable/delete/main.c 7 Jul 2025 09:26:05 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: main.c,v 1.8 2025/07/07 06:33:40 dlg Exp $ */ +/* $OpenBSD: main.c,v 1.7 2021/04/13 08:21:12 claudio Exp $ */ /* * Copyright (c) 2015 Martin Pieuchot @@ -19,8 +19,11 @@ #include "srp_compat.h" #include +#include #include #include + +#include #include #include @@ -61,10 +64,10 @@ main(int argc, char *argv[]) struct rtable *tbl; tbl = rtable_get(0, AF_INET6); assert(tbl != NULL); - struct art_root *ar; - ar = tbl->r_art; - assert(ar != NULL); - assert(ar->ar_root.ref == NULL); + struct art *art; + art = tbl->r_art; + assert(art != NULL); + assert(art->art_root == NULL); return (0); } Index: regress/sys/net/rtable/fullfeed/main.c =================================================================== RCS file: /cvs/src/regress/sys/net/rtable/fullfeed/main.c,v diff -u -p -r1.5 main.c --- regress/sys/net/rtable/fullfeed/main.c 13 Apr 2021 08:21:12 -0000 1.5 +++ regress/sys/net/rtable/fullfeed/main.c 7 Jul 2025 09:26:05 -0000 @@ -19,6 +19,7 @@ #include "srp_compat.h" #include +#include #include #include