Index: tcp_input.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_input.c,v retrieving revision 1.325 diff -u -p -r1.325 tcp_input.c --- tcp_input.c 20 Jul 2016 09:15:28 -0000 1.325 +++ tcp_input.c 26 Jul 2016 07:09:15 -0000 @@ -3264,63 +3264,58 @@ tcp_mss_adv(struct mbuf *m, int af) * TCP compressed state engine. Currently used to hold compressed * state for SYN_RECEIVED. */ +RB_HEAD(syn_cache_tree, syn_cache); +TAILQ_HEAD(syn_cache_list, syn_cache); + +static inline int +syn_cache_cmp(struct syn_cache *a, struct syn_cache *b) +{ + int rv; + + if (a->sc_rtableid > b->sc_rtableid) + return (-1); + if (a->sc_rtableid < b->sc_rtableid) + return (1); + + rv = memcmp(&a->sc_src.sa, &b->sc_src.sa, a->sc_src.sa.sa_len); + if (rv != 0) + return (rv); + + rv = memcmp(&a->sc_dst.sa, &b->sc_dst.sa, a->sc_dst.sa.sa_len); + + return (rv); +} + +RB_PROTOTYPE(syn_cache_tree, syn_cache, sc_tentry, syn_cache_cmp); + +struct syn_cache * + syn_cache_lookup(struct sockaddr *, struct sockaddr *, u_int); +struct syn_cache * + syn_cache_insert(struct syn_cache *, struct tcpcb *); /* syn hash parameters */ -int tcp_syn_hash_size = TCP_SYN_HASH_SIZE; int tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE; -int tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE; -int tcp_syn_use_limit = 100000; - -struct syn_cache_set tcp_syn_cache[2]; -int tcp_syn_cache_active; -#define SYN_HASH(sa, sp, dp, rand) \ - (((sa)->s_addr ^ (rand)[0]) * \ - (((((u_int32_t)(dp))<<16) + ((u_int32_t)(sp))) ^ (rand)[4])) -#ifndef INET6 -#define SYN_HASHALL(hash, src, dst, rand) \ -do { \ - hash = SYN_HASH(&satosin(src)->sin_addr, \ - satosin(src)->sin_port, \ - satosin(dst)->sin_port, (rand)); \ -} while (/*CONSTCOND*/ 0) -#else -#define SYN_HASH6(sa, sp, dp, rand) \ - (((sa)->s6_addr32[0] ^ (rand)[0]) * \ - ((sa)->s6_addr32[1] ^ (rand)[1]) * \ - ((sa)->s6_addr32[2] ^ (rand)[2]) * \ - ((sa)->s6_addr32[3] ^ (rand)[3]) * \ - (((((u_int32_t)(dp))<<16) + ((u_int32_t)(sp))) ^ (rand)[4])) +struct pool syn_cache_pool; +struct syn_cache_tree syn_cache_tree; +struct syn_cache_list syn_cache_list; +unsigned int syn_cache_count; -#define SYN_HASHALL(hash, src, dst, rand) \ -do { \ - switch ((src)->sa_family) { \ - case AF_INET: \ - hash = SYN_HASH(&satosin(src)->sin_addr, \ - satosin(src)->sin_port, \ - satosin(dst)->sin_port, (rand)); \ - break; \ - case AF_INET6: \ - hash = SYN_HASH6(&satosin6(src)->sin6_addr, \ - satosin6(src)->sin6_port, \ - satosin6(dst)->sin6_port, (rand)); \ - break; \ - default: \ - hash = 0; \ - } \ -} while (/*CONSTCOND*/0) -#endif /* INET6 */ +RB_GENERATE(syn_cache_tree, syn_cache, sc_tentry, syn_cache_cmp); void syn_cache_rm(struct syn_cache *sc) { sc->sc_flags |= SCF_DEAD; - TAILQ_REMOVE(&sc->sc_buckethead->sch_bucket, sc, sc_bucketq); + + RB_REMOVE(syn_cache_tree, &syn_cache_tree, sc); + TAILQ_REMOVE(&syn_cache_list, sc, sc_lentry); + syn_cache_count--; + sc->sc_tp = NULL; LIST_REMOVE(sc, sc_tpq); - sc->sc_buckethead->sch_length--; + timeout_del(&sc->sc_timer); - sc->sc_set->scs_count--; } void @@ -3336,8 +3331,6 @@ syn_cache_put(struct syn_cache *sc) timeout_add(&sc->sc_timer, 0); } -struct pool syn_cache_pool; - /* * We don't estimate RTT with SYNs, so each packet starts with the default * RTT and each timer step has a fixed timeout value. @@ -3347,8 +3340,6 @@ do { \ TCPT_RANGESET((sc)->sc_rxtcur, \ TCPTV_SRTTDFLT * tcp_backoff[(sc)->sc_rxtshift], TCPTV_MIN, \ TCPTV_REXMTMAX); \ - if (!timeout_initialized(&(sc)->sc_timer)) \ - timeout_set(&(sc)->sc_timer, syn_cache_timer, (sc)); \ timeout_add(&(sc)->sc_timer, (sc)->sc_rxtcur * (hz / PR_SLOWHZ)); \ } while (/*CONSTCOND*/0) @@ -3357,159 +3348,56 @@ do { \ void syn_cache_init(void) { - int i; - - /* Initialize the hash buckets. */ - tcp_syn_cache[0].scs_buckethead = mallocarray(tcp_syn_hash_size, - sizeof(struct syn_cache_head), M_SYNCACHE, M_WAITOK|M_ZERO); - tcp_syn_cache[1].scs_buckethead = mallocarray(tcp_syn_hash_size, - sizeof(struct syn_cache_head), M_SYNCACHE, M_WAITOK|M_ZERO); - tcp_syn_cache[0].scs_size = tcp_syn_hash_size; - tcp_syn_cache[1].scs_size = tcp_syn_hash_size; - for (i = 0; i < tcp_syn_hash_size; i++) { - TAILQ_INIT(&tcp_syn_cache[0].scs_buckethead[i].sch_bucket); - TAILQ_INIT(&tcp_syn_cache[1].scs_buckethead[i].sch_bucket); - } - /* Initialize the syn cache pool. */ pool_init(&syn_cache_pool, sizeof(struct syn_cache), 0, 0, 0, "syncache", NULL); pool_setipl(&syn_cache_pool, IPL_SOFTNET); + + /* Initialize the syn cache storage. */ + RB_INIT(&syn_cache_tree); + TAILQ_INIT(&syn_cache_list); + syn_cache_count = 0; } -void +struct syn_cache * syn_cache_insert(struct syn_cache *sc, struct tcpcb *tp) { - struct syn_cache_set *set = &tcp_syn_cache[tcp_syn_cache_active]; - struct syn_cache_head *scp; - struct syn_cache *sc2; - int i, s; - - s = splsoftnet(); - - /* - * If there are no entries in the hash table, reinitialize - * the hash secrets. To avoid useless cache swaps and - * reinitialization, use it until the limit is reached. - * An emtpy cache is also the oportunity to resize the hash. - */ - if (set->scs_count == 0 && set->scs_use <= 0) { - set->scs_use = tcp_syn_use_limit; - if (set->scs_size != tcp_syn_hash_size) { - scp = mallocarray(tcp_syn_hash_size, sizeof(struct - syn_cache_head), M_SYNCACHE, M_NOWAIT|M_ZERO); - if (scp == NULL) { - /* Try again next time. */ - set->scs_use = 0; - } else { - free(set->scs_buckethead, M_SYNCACHE, - set->scs_size * - sizeof(struct syn_cache_head)); - set->scs_buckethead = scp; - set->scs_size = tcp_syn_hash_size; - for (i = 0; i < tcp_syn_hash_size; i++) - TAILQ_INIT(&scp[i].sch_bucket); - } - } - arc4random_buf(set->scs_random, sizeof(set->scs_random)); - tcpstat.tcps_sc_seedrandom++; - } - - SYN_HASHALL(sc->sc_hash, &sc->sc_src.sa, &sc->sc_dst.sa, - set->scs_random); - scp = &set->scs_buckethead[sc->sc_hash % set->scs_size]; - sc->sc_buckethead = scp; - - /* - * Make sure that we don't overflow the per-bucket - * limit or the total cache size limit. - */ - if (scp->sch_length >= tcp_syn_bucket_limit) { - tcpstat.tcps_sc_bucketoverflow++; - /* - * Someone might attack our bucket hash function. Reseed - * with random as soon as the passive syn cache gets empty. - */ - set->scs_use = 0; - /* - * The bucket is full. Toss the oldest element in the - * bucket. This will be the first entry in the bucket. - */ - sc2 = TAILQ_FIRST(&scp->sch_bucket); -#ifdef DIAGNOSTIC - /* - * This should never happen; we should always find an - * entry in our bucket. - */ - if (sc2 == NULL) - panic("syn_cache_insert: bucketoverflow: impossible"); -#endif - syn_cache_rm(sc2); - syn_cache_put(sc2); - } else if (set->scs_count >= tcp_syn_cache_limit) { - struct syn_cache_head *scp2, *sce; - - tcpstat.tcps_sc_overflowed++; - /* - * The cache is full. Toss the oldest entry in the - * first non-empty bucket we can find. - * - * XXX We would really like to toss the oldest - * entry in the cache, but we hope that this - * condition doesn't happen very often. - */ - scp2 = scp; - if (TAILQ_EMPTY(&scp2->sch_bucket)) { - sce = &set->scs_buckethead[set->scs_size]; - for (++scp2; scp2 != scp; scp2++) { - if (scp2 >= sce) - scp2 = &set->scs_buckethead[0]; - if (! TAILQ_EMPTY(&scp2->sch_bucket)) - break; - } -#ifdef DIAGNOSTIC - /* - * This should never happen; we should always find a - * non-empty bucket. - */ - if (scp2 == scp) - panic("syn_cache_insert: cacheoverflow: " - "impossible"); -#endif - } - sc2 = TAILQ_FIRST(&scp2->sch_bucket); - syn_cache_rm(sc2); - syn_cache_put(sc2); - } + struct syn_cache *oldsc; + int s; /* * Initialize the entry's timer. */ sc->sc_rxttot = 0; sc->sc_rxtshift = 0; - SYN_CACHE_TIMER_ARM(sc); - /* Link it from tcpcb entry */ - LIST_INSERT_HEAD(&tp->t_sc, sc, sc_tpq); + s = splsoftnet(); - /* Put it into the bucket. */ - TAILQ_INSERT_TAIL(&scp->sch_bucket, sc, sc_bucketq); - scp->sch_length++; - sc->sc_set = set; - set->scs_count++; - set->scs_use--; + /* Put it into the tree. */ + oldsc = RB_INSERT(syn_cache_tree, &syn_cache_tree, sc); + if (oldsc == NULL) { + TAILQ_INSERT_TAIL(&syn_cache_list, sc, sc_lentry); + + if (++syn_cache_count > tcp_syn_cache_limit) { + oldsc = TAILQ_FIRST(&syn_cache_list); + KASSERT(oldsc != NULL); + syn_cache_rm(oldsc); + syn_cache_put(oldsc); - tcpstat.tcps_sc_added++; + tcpstat.tcps_sc_overflowed++; - /* - * If the active cache has exceeded its use limit and - * the passive syn cache is empty, exchange their roles. - */ - if (set->scs_use <= 0 && - tcp_syn_cache[!tcp_syn_cache_active].scs_count == 0) - tcp_syn_cache_active = !tcp_syn_cache_active; + oldsc = NULL; + } + tcpstat.tcps_sc_added++; + + /* Link it from tcpcb entry */ + LIST_INSERT_HEAD(&tp->t_sc, sc, sc_tpq); + SYN_CACHE_TIMER_ARM(sc); + } splx(s); + + return (oldsc); } /* @@ -3556,8 +3444,8 @@ syn_cache_timer(void *arg) dropit: tcpstat.tcps_sc_timed_out++; syn_cache_rm(sc); - syn_cache_put(sc); splx(s); + syn_cache_put(sc); } void @@ -3565,8 +3453,7 @@ syn_cache_reaper(void *arg) { struct syn_cache *sc = arg; - pool_put(&syn_cache_pool, (sc)); - return; + pool_put(&syn_cache_pool, sc); } /* @@ -3600,36 +3487,18 @@ syn_cache_cleanup(struct tcpcb *tp) * Find an entry in the syn cache. */ struct syn_cache * -syn_cache_lookup(struct sockaddr *src, struct sockaddr *dst, - struct syn_cache_head **headp, u_int rtableid) +syn_cache_lookup(struct sockaddr *src, struct sockaddr *dst, u_int rtableid) { - struct syn_cache_set *sets[2]; - struct syn_cache *sc; - struct syn_cache_head *scp; - u_int32_t hash; - int i; - - splsoftassert(IPL_SOFTNET); - - /* Check the active cache first, the passive cache is likely emtpy. */ - sets[0] = &tcp_syn_cache[tcp_syn_cache_active]; - sets[1] = &tcp_syn_cache[!tcp_syn_cache_active]; - for (i = 0; i < 2; i++) { - if (sets[i]->scs_count == 0) - continue; - SYN_HASHALL(hash, src, dst, sets[i]->scs_random); - scp = &sets[i]->scs_buckethead[hash % sets[i]->scs_size]; - *headp = scp; - TAILQ_FOREACH(sc, &scp->sch_bucket, sc_bucketq) { - if (sc->sc_hash != hash) - continue; - if (!bcmp(&sc->sc_src, src, src->sa_len) && - !bcmp(&sc->sc_dst, dst, dst->sa_len) && - rtable_l2(rtableid) == rtable_l2(sc->sc_rtableid)) - return (sc); - } - } - return (NULL); + struct syn_cache_key sck; + + splassert(IPL_SOFTNET); + + memcpy(&sck.sck_src.sa, src, src->sa_len); + memcpy(&sck.sck_dst.sa, dst, dst->sa_len); + sck.sck_rtableid = rtableid; + + return (RB_FIND(syn_cache_tree, &syn_cache_tree, + (struct syn_cache *)&sck)); } /* @@ -3660,7 +3529,6 @@ syn_cache_get(struct sockaddr *src, stru u_int hlen, u_int tlen, struct socket *so, struct mbuf *m) { struct syn_cache *sc; - struct syn_cache_head *scp; struct inpcb *inp, *oldinp; struct tcpcb *tp = NULL; struct mbuf *am; @@ -3671,8 +3539,8 @@ syn_cache_get(struct sockaddr *src, stru #endif s = splsoftnet(); - if ((sc = syn_cache_lookup(src, dst, &scp, - sotoinpcb(so)->inp_rtableid)) == NULL) { + sc = syn_cache_lookup(src, dst, sotoinpcb(so)->inp_rtableid); + if (sc == NULL) { splx(s); return (NULL); } @@ -3899,10 +3767,9 @@ syn_cache_reset(struct sockaddr *src, st u_int rtableid) { struct syn_cache *sc; - struct syn_cache_head *scp; int s = splsoftnet(); - if ((sc = syn_cache_lookup(src, dst, &scp, rtableid)) == NULL) { + if ((sc = syn_cache_lookup(src, dst, rtableid)) == NULL) { splx(s); return; } @@ -3922,11 +3789,10 @@ syn_cache_unreach(struct sockaddr *src, u_int rtableid) { struct syn_cache *sc; - struct syn_cache_head *scp; int s; s = splsoftnet(); - if ((sc = syn_cache_lookup(src, dst, &scp, rtableid)) == NULL) { + if ((sc = syn_cache_lookup(src, dst, rtableid)) == NULL) { splx(s); return; } @@ -3978,7 +3844,6 @@ syn_cache_add(struct sockaddr *src, stru struct tcpcb tb, *tp; long win; struct syn_cache *sc; - struct syn_cache_head *scp; struct mbuf *ipopts; tp = sototcpcb(so); @@ -4033,8 +3898,8 @@ syn_cache_add(struct sockaddr *src, stru * If we do, resend the SYN,ACK. We do not count this * as a retransmission (XXX though maybe we should). */ - if ((sc = syn_cache_lookup(src, dst, &scp, sotoinpcb(so)->inp_rtableid)) - != NULL) { + sc = syn_cache_lookup(src, dst, sotoinpcb(so)->inp_rtableid); + if (sc != NULL) { tcpstat.tcps_sc_dupesyn++; if (ipopts) { /* @@ -4060,6 +3925,8 @@ syn_cache_add(struct sockaddr *src, stru return (-1); } + timeout_set(&sc->sc_timer, syn_cache_timer, sc); + /* * Fill in the cache, and put the necessary IP and TCP * options into the reply. @@ -4134,7 +4001,9 @@ syn_cache_add(struct sockaddr *src, stru #endif sc->sc_tp = tp; if (syn_cache_respond(sc, m) == 0) { - syn_cache_insert(sc, tp); + if (syn_cache_insert(sc, tp) != NULL) + syn_cache_put(sc); + tcpstat.tcps_sndacks++; tcpstat.tcps_sndtotal++; } else { Index: tcp_usrreq.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.134 diff -u -p -r1.134 tcp_usrreq.c --- tcp_usrreq.c 20 Jul 2016 19:57:53 -0000 1.134 +++ tcp_usrreq.c 26 Jul 2016 07:09:15 -0000 @@ -937,63 +937,16 @@ tcp_sysctl(name, namelen, oldp, oldlenp, if (newp != NULL) return (EPERM); { - struct syn_cache_set *set; - int i; - - set = &tcp_syn_cache[tcp_syn_cache_active]; - tcpstat.tcps_sc_hash_size = set->scs_size; - tcpstat.tcps_sc_entry_count = set->scs_count; tcpstat.tcps_sc_entry_limit = tcp_syn_cache_limit; - tcpstat.tcps_sc_bucket_maxlen = 0; - for (i = 0; i < set->scs_size; i++) { - if (tcpstat.tcps_sc_bucket_maxlen < - set->scs_buckethead[i].sch_length) - tcpstat.tcps_sc_bucket_maxlen = - set->scs_buckethead[i].sch_length; - } - tcpstat.tcps_sc_bucket_limit = tcp_syn_bucket_limit; - tcpstat.tcps_sc_uses_left = set->scs_use; } return (sysctl_struct(oldp, oldlenp, newp, newlen, &tcpstat, sizeof(tcpstat))); case TCPCTL_SYN_USE_LIMIT: - error = sysctl_int(oldp, oldlenp, newp, newlen, - &tcp_syn_use_limit); - if (error) - return (error); - if (newp != NULL) { - /* - * Global tcp_syn_use_limit is used when reseeding a - * new cache. Also update the value in active cache. - */ - if (tcp_syn_cache[0].scs_use > tcp_syn_use_limit) - tcp_syn_cache[0].scs_use = tcp_syn_use_limit; - if (tcp_syn_cache[1].scs_use > tcp_syn_use_limit) - tcp_syn_cache[1].scs_use = tcp_syn_use_limit; - } - return (0); + return (ENOTDIR); case TCPCTL_SYN_HASH_SIZE: - nval = tcp_syn_hash_size; - error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); - if (error) - return (error); - if (nval != tcp_syn_hash_size) { - if (nval < 1 || nval > 100000) - return (EINVAL); - /* - * If global hash size has been changed, switch sets as - * soon as possible. Then the actual hash array will - * be reallocated. - */ - if (tcp_syn_cache[0].scs_size != nval) - tcp_syn_cache[0].scs_use = 0; - if (tcp_syn_cache[1].scs_size != nval) - tcp_syn_cache[1].scs_use = 0; - tcp_syn_hash_size = nval; - } - return (0); + return (ENOTDIR); default: if (name[0] < TCPCTL_MAXID) Index: tcp_var.h =================================================================== RCS file: /cvs/src/sys/netinet/tcp_var.h,v retrieving revision 1.115 diff -u -p -r1.115 tcp_var.h --- tcp_var.h 20 Jul 2016 19:57:53 -0000 1.115 +++ tcp_var.h 26 Jul 2016 07:09:15 -0000 @@ -261,8 +261,20 @@ union syn_cache_sa { struct sockaddr_in6 sin6; }; +struct syn_cache_key { + union syn_cache_sa sck_src; + union syn_cache_sa sck_dst; + u_int sck_rtableid; +}; + struct syn_cache { - TAILQ_ENTRY(syn_cache) sc_bucketq; /* link on bucket list */ + struct syn_cache_key sc_key; +#define sc_rtableid sc_key.sck_rtableid +#define sc_src sc_key.sck_src +#define sc_dst sc_key.sck_dst + + RB_ENTRY(syn_cache) sc_tentry; + TAILQ_ENTRY(syn_cache) sc_lentry; struct timeout sc_timer; /* rexmt timer */ union { /* cached route */ struct route route4; @@ -275,19 +287,13 @@ struct syn_cache { #define sc_route6 sc_route_u.route6 #endif long sc_win; /* advertised window */ - struct syn_cache_head *sc_buckethead; /* our bucket index */ - struct syn_cache_set *sc_set; /* our syn cache set */ - u_int32_t sc_hash; u_int32_t sc_timestamp; /* timestamp from SYN */ u_int32_t sc_modulate; /* our timestamp modulator */ #if 0 u_int32_t sc_timebase; /* our local timebase */ #endif - union syn_cache_sa sc_src; - union syn_cache_sa sc_dst; tcp_seq sc_irs; tcp_seq sc_iss; - u_int sc_rtableid; u_int sc_rxtcur; /* current rxt timeout */ u_int sc_rxttot; /* total time spend on queues */ u_short sc_rxtshift; /* for computing backoff */ @@ -310,19 +316,6 @@ struct syn_cache { LIST_ENTRY(syn_cache) sc_tpq; /* list of entries by same tp */ }; -struct syn_cache_head { - TAILQ_HEAD(, syn_cache) sch_bucket; /* bucket entries */ - u_short sch_length; /* # entries in bucket */ -}; - -struct syn_cache_set { - struct syn_cache_head *scs_buckethead; - int scs_size; - int scs_count; - int scs_use; - u_int32_t scs_random[5]; -}; - #endif /* _KERNEL */ /* @@ -547,7 +540,7 @@ struct tcpstat { &tcp_ack_on_push, \ NULL, \ &tcp_syn_cache_limit, \ - &tcp_syn_bucket_limit, \ + NULL, \ &tcp_do_rfc3390, \ NULL, \ NULL, \ @@ -585,12 +578,7 @@ extern int tcp_do_rfc3390; /* RFC3390 In extern struct pool tcpqe_pool; extern int tcp_reass_limit; /* max entries for tcp reass queues */ -extern int tcp_syn_hash_size; /* adjustable size of the hash array */ extern int tcp_syn_cache_limit; /* max entries for compressed state engine */ -extern int tcp_syn_bucket_limit;/* max entries per hash bucket */ -extern int tcp_syn_use_limit; /* number of uses before reseeding hash */ -extern struct syn_cache_set tcp_syn_cache[]; -extern int tcp_syn_cache_active; /* active syn cache, may be 0 or 1 */ int tcp_attach(struct socket *); void tcp_canceltimers(struct tcpcb *); @@ -679,9 +667,6 @@ struct socket *syn_cache_get(struct sock struct tcphdr *, unsigned int, unsigned int, struct socket *so, struct mbuf *); void syn_cache_init(void); -void syn_cache_insert(struct syn_cache *, struct tcpcb *); -struct syn_cache *syn_cache_lookup(struct sockaddr *, struct sockaddr *, - struct syn_cache_head **, u_int); void syn_cache_reset(struct sockaddr *, struct sockaddr *, struct tcphdr *, u_int); int syn_cache_respond(struct syn_cache *, struct mbuf *);