Index: conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.591 diff -u -p -r1.591 files --- conf/files 28 Dec 2014 21:32:44 -0000 1.591 +++ conf/files 21 Apr 2015 09:04:07 -0000 @@ -857,6 +857,7 @@ file crypto/sha1.c (inet & ipsec) | cr file crypto/sha2.c file crypto/blf.c (inet & ipsec) | crypto | vnd file crypto/cast.c (inet & ipsec) | crypto +file crypto/chacha.c file crypto/ecb_enc.c (inet & ipsec) | crypto file crypto/set_key.c (inet & ipsec) | crypto file crypto/ecb3_enc.c (inet & ipsec) | crypto Index: crypto/chacha.c =================================================================== RCS file: crypto/chacha.c diff -N crypto/chacha.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ crypto/chacha.c 21 Apr 2015 09:04:07 -0000 @@ -0,0 +1,221 @@ +/* $OpenBSD$ */ + +/* +chacha-merged.c version 20080118 +D. J. Bernstein +Public domain. +*/ + +#include +#include + +#include + +#define U8TO32_LITTLE(p) \ + (((uint32_t)((p)[0]) ) | \ + ((uint32_t)((p)[1]) << 8) | \ + ((uint32_t)((p)[2]) << 16) | \ + ((uint32_t)((p)[3]) << 24)) + +#define U32TO8_LITTLE(p, v) do { \ + (p)[0] = (v) ; \ + (p)[1] = (v) >> 8; \ + (p)[2] = (v) >> 16; \ + (p)[3] = (v) >> 24; \ +} while (0) + +#define ROTATE(v, n) ((v) << (n) | ((v) >> (32 - (n)))) + +static void +chacha_key_setup(chacha_ctx *ctx, const uint32_t *c, + const uint32_t *k1, const uint32_t *k2) +{ + ctx->input[0] = lemtoh32(&c[0]); + ctx->input[1] = lemtoh32(&c[1]); + ctx->input[2] = lemtoh32(&c[2]); + ctx->input[3] = lemtoh32(&c[3]); + + ctx->input[4] = lemtoh32(&k1[0]); + ctx->input[5] = lemtoh32(&k1[1]); + ctx->input[6] = lemtoh32(&k1[2]); + ctx->input[7] = lemtoh32(&k1[3]); + + ctx->input[8] = lemtoh32(&k2[0]); + ctx->input[9] = lemtoh32(&k2[1]); + ctx->input[10] = lemtoh32(&k2[2]); + ctx->input[11] = lemtoh32(&k2[3]); +} + +void +chacha_128key_setup(chacha_ctx *ctx, const chacha_128key *key) +{ + static const char tau[16] = "expand 16-byte k"; + + chacha_key_setup(ctx, (uint32_t *)tau, key->k, key->k); +} + +void +chacha_256key_setup(chacha_ctx *ctx, const chacha_256key *key) +{ + static const char sigma[16] = "expand 32-byte k"; + + chacha_key_setup(ctx, (uint32_t *)sigma, key->k, key->k + 4); +} + +void +chacha_iv_setup(chacha_ctx *ctx, const chacha_iv *iv) +{ + ctx->input[12] = 0; + ctx->input[13] = 0; + ctx->input[14] = lemtoh32(&iv->iv[0]); + ctx->input[15] = lemtoh32(&iv->iv[1]); +} + +#define QUARTERROUND(a, b, c, d) do { \ + a += b; d = ROTATE(d ^ a, 16); \ + c += d; b = ROTATE(b ^ c, 12); \ + a += b; d = ROTATE(d ^ a, 8); \ + c += d; b = ROTATE(b ^ c, 7); \ +} while (0) + +static inline void +chacha_round(chacha_ctx *ctx, chacha_ctx *state) +{ + uint32_t *x = state->input; + u_int i; + + *state = *ctx; + + for (i = 20; i > 0; i -= 2) { + QUARTERROUND(x[0], x[4], x[8], x[12]); + QUARTERROUND(x[1], x[5], x[9], x[13]); + QUARTERROUND(x[2], x[6], x[10], x[14]); + QUARTERROUND(x[3], x[7], x[11], x[15]); + QUARTERROUND(x[0], x[5], x[10], x[15]); + QUARTERROUND(x[1], x[6], x[11], x[12]); + QUARTERROUND(x[2], x[7], x[8], x[13]); + QUARTERROUND(x[3], x[4], x[9], x[14]); + } + + /* unrolled loop is fast */ + x[0] += ctx->input[0]; + x[1] += ctx->input[1]; + x[2] += ctx->input[2]; + x[3] += ctx->input[3]; + x[4] += ctx->input[4]; + x[5] += ctx->input[5]; + x[6] += ctx->input[6]; + x[7] += ctx->input[7]; + x[8] += ctx->input[8]; + x[9] += ctx->input[9]; + x[10] += ctx->input[10]; + x[11] += ctx->input[11]; + x[12] += ctx->input[12]; + x[13] += ctx->input[13]; + x[14] += ctx->input[14]; + x[15] += ctx->input[15]; + + if (++ctx->input[12] == 0) + ++ctx->input[13]; +} + +static inline void +chacha_wr(const chacha_ctx *state, uint8_t *dst) +{ + U32TO8_LITTLE(dst, state->input[0]); + U32TO8_LITTLE(dst + 4, state->input[1]); + U32TO8_LITTLE(dst + 8, state->input[2]); + U32TO8_LITTLE(dst + 12, state->input[3]); + U32TO8_LITTLE(dst + 16, state->input[4]); + U32TO8_LITTLE(dst + 20, state->input[5]); + U32TO8_LITTLE(dst + 24, state->input[6]); + U32TO8_LITTLE(dst + 28, state->input[7]); + U32TO8_LITTLE(dst + 32, state->input[8]); + U32TO8_LITTLE(dst + 36, state->input[9]); + U32TO8_LITTLE(dst + 40, state->input[10]); + U32TO8_LITTLE(dst + 44, state->input[11]); + U32TO8_LITTLE(dst + 48, state->input[12]); + U32TO8_LITTLE(dst + 52, state->input[13]); + U32TO8_LITTLE(dst + 56, state->input[14]); + U32TO8_LITTLE(dst + 60, state->input[15]); +} + +void +chacha_stream(chacha_ctx *ctx, void *d, size_t len) +{ + chacha_ctx state; + uint8_t *dst = d; + + if (len == 0) + return; + + while (len >= CHACHA_BLOCK_LEN) { + chacha_round(ctx, &state); + + chacha_wr(&state, dst); + + dst += CHACHA_BLOCK_LEN; + len -= CHACHA_BLOCK_LEN; + } + + if (len > 0) { + uint8_t tmp[CHACHA_BLOCK_LEN]; + + chacha_stream(ctx, tmp, sizeof(tmp)); + memcpy(dst, tmp, len); + } +} + +#ifdef notyet +static inline void +chacha_xorwr(const chacha_ctx *state, uint8_t *dst, const uint8_t *src) +{ + + /* unrolled loop is fast */ + U32TO8_LITTLE(dst, state->input[0] ^ U8TO32_LITTLE(src)); + U32TO8_LITTLE(dst + 4, state->input[1] ^ U8TO32_LITTLE(src + 4)); + U32TO8_LITTLE(dst + 8, state->input[2] ^ U8TO32_LITTLE(src + 8)); + U32TO8_LITTLE(dst + 12, state->input[3] ^ U8TO32_LITTLE(src + 12)); + U32TO8_LITTLE(dst + 16, state->input[4] ^ U8TO32_LITTLE(src + 16)); + U32TO8_LITTLE(dst + 20, state->input[5] ^ U8TO32_LITTLE(src + 20)); + U32TO8_LITTLE(dst + 24, state->input[6] ^ U8TO32_LITTLE(src + 24)); + U32TO8_LITTLE(dst + 28, state->input[7] ^ U8TO32_LITTLE(src + 28)); + U32TO8_LITTLE(dst + 32, state->input[8] ^ U8TO32_LITTLE(src + 32)); + U32TO8_LITTLE(dst + 36, state->input[9] ^ U8TO32_LITTLE(src + 36)); + U32TO8_LITTLE(dst + 40, state->input[10] ^ U8TO32_LITTLE(src + 40)); + U32TO8_LITTLE(dst + 44, state->input[11] ^ U8TO32_LITTLE(src + 44)); + U32TO8_LITTLE(dst + 48, state->input[12] ^ U8TO32_LITTLE(src + 48)); + U32TO8_LITTLE(dst + 52, state->input[13] ^ U8TO32_LITTLE(src + 52)); + U32TO8_LITTLE(dst + 56, state->input[14] ^ U8TO32_LITTLE(src + 56)); + U32TO8_LITTLE(dst + 60, state->input[15] ^ U8TO32_LITTLE(src + 60)); +} + +void +chacha_encrypt(chacha_ctx *ctx, void *d, const void *s, size_t len) +{ + chacha_ctx state; + const uint8_t *src = s; + uint8_t *dst = d; + + if (len == 0) + return; + + while (len >= CHACHA_BLOCK_LEN) { + chacha_round(ctx, &state); + + chacha_xorwr(&state, dst, src); + + dst += CHACHA_BLOCK_LEN; + src += CHACHA_BLOCK_LEN; + len -= CHACHA_BLOCK_LEN; + } + + if (len > 0) { + uint8_t tmp[CHACHA_BLOCK_LEN]; + + memcpy(tmp, src, len); + chacha_encrypt(ctx, tmp, tmp, sizeof(tmp)); + memcpy(dst, tmp, len); + } +} +#endif Index: crypto/chacha.h =================================================================== RCS file: crypto/chacha.h diff -N crypto/chacha.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ crypto/chacha.h 21 Apr 2015 09:04:07 -0000 @@ -0,0 +1,43 @@ +/* $OpenBSD$ */ + +/* +chacha-merged.c version 20080118 +D. J. Bernstein +Public domain. +*/ + +#ifndef _CHACHA_H_ +#define _CHACHA_H_ + +typedef struct { + uint32_t input[16]; /* could be compressed */ +} chacha_ctx; + +typedef struct { + uint32_t k[4]; +} chacha_128key; + +typedef struct { + uint32_t k[8]; +} chacha_256key; + +typedef struct { + uint32_t iv[2]; +} chacha_iv; + +#define CHACHA_128KEY_LEN sizeof(chacha_128key) +#define CHACHA_256KEY_LEN sizeof(chacha_256key) +#define CHACHA_IV_LEN sizeof(chacha_iv) +#define CHACHA_BLOCK_LEN 64 + +void chacha_128key_setup(chacha_ctx *, const chacha_128key *); +void chacha_256key_setup(chacha_ctx *, const chacha_256key *); +void chacha_iv_setup(chacha_ctx *, const chacha_iv *); +void chacha_stream(chacha_ctx *, void *, size_t) + __bounded((__buffer__, 2, 3)); +#ifdef notyet +void chacha_encrypt(chacha_ctx *, void *, const void *, size_t) + __bounded((__buffer__, 2, 4)) __bounded((__buffer__, 3, 4)); +#endif + +#endif /* _CHACHA_H_ */ Index: dev/rnd.c =================================================================== RCS file: /cvs/src/sys/dev/rnd.c,v retrieving revision 1.173 diff -u -p -r1.173 rnd.c --- dev/rnd.c 14 Mar 2015 03:38:46 -0000 1.173 +++ dev/rnd.c 21 Apr 2015 09:04:07 -0000 @@ -128,8 +128,7 @@ #include -#define KEYSTREAM_ONLY -#include +#include #include @@ -214,11 +213,11 @@ struct timer_rand_state { /* There is on #define QEVSLOW (QEVLEN * 3 / 4) /* yet another 0.75 for 60-minutes hour /-; */ #define QEVSBITS 10 -#define KEYSZ 32 -#define IVSZ 8 -#define BLOCKSZ 64 +#define KEYSZ CHACHA_256KEY_LEN +#define IVSZ CHACHA_IV_LEN +#define BLOCKSZ CHACHA_BLOCK_LEN #define RSBUFSZ (16*BLOCKSZ) -#define EBUFSIZE KEYSZ + IVSZ +#define EBUFSIZE (KEYSZ + IVSZ) struct rand_event { struct timer_rand_state *re_state; @@ -488,10 +487,6 @@ extract_entropy(u_int8_t *buf) u_char digest[SHA512_DIGEST_LENGTH]; SHA2_CTX shactx; -#if SHA512_DIGEST_LENGTH < EBUFSIZE -#error "need more bigger hash output" -#endif - /* * INTENTIONALLY not protected by entropylock. Races during * memcpy() result in acceptable input data; races during @@ -568,8 +563,8 @@ static inline void _rs_init(u_char *buf, size_t n) { KASSERT(n >= KEYSZ + IVSZ); - chacha_keysetup(&rs, buf, KEYSZ * 8, 0); - chacha_ivsetup(&rs, buf + KEYSZ); + chacha_256key_setup(&rs, (chacha_256key *)buf); + chacha_iv_setup(&rs, (chacha_iv *)(buf + KEYSZ)); } static void @@ -629,11 +624,8 @@ _rs_stir_if_needed(size_t len) static inline void _rs_rekey(u_char *dat, size_t datlen) { -#ifndef KEYSTREAM_ONLY - memset(rs_buf, 0, RSBUFSZ); -#endif /* fill rs_buf with the keystream */ - chacha_encrypt_bytes(&rs, rs_buf, rs_buf, RSBUFSZ); + chacha_stream(&rs, rs_buf, RSBUFSZ); /* mix in optional user provided data */ if (dat) { size_t i, m; @@ -821,7 +813,10 @@ randomclose(dev_t dev, int flag, int mod int randomread(dev_t dev, struct uio *uio, int ioflag) { - u_char lbuf[KEYSZ+IVSZ]; + struct { + chacha_256key key; + chacha_iv iv; + } lbuf; chacha_ctx lctx; size_t total = uio->uio_resid; u_char *buf; @@ -832,22 +827,19 @@ randomread(dev_t dev, struct uio *uio, i buf = malloc(POOLBYTES, M_TEMP, M_WAITOK); if (total > ARC4_MAIN_MAX_BYTES) { - arc4random_buf(lbuf, sizeof(lbuf)); - chacha_keysetup(&lctx, lbuf, KEYSZ * 8, 0); - chacha_ivsetup(&lctx, lbuf + KEYSZ); - explicit_bzero(lbuf, sizeof(lbuf)); + arc4random_buf(&lbuf, sizeof(lbuf)); + chacha_256key_setup(&lctx, &lbuf.key); + chacha_iv_setup(&lctx, &lbuf.iv); + explicit_bzero(&lbuf, sizeof(lbuf)); myctx = 1; } while (ret == 0 && uio->uio_resid > 0) { int n = min(POOLBYTES, uio->uio_resid); - if (myctx) { -#ifndef KEYSTREAM_ONLY - memset(buf, 0, n); -#endif - chacha_encrypt_bytes(&lctx, buf, buf, n); - } else + if (myctx) + chacha_stream(&lctx, buf, n); + else arc4random_buf(buf, n); ret = uiomovei(buf, n, uio); if (ret == 0 && uio->uio_resid > 0)