diff --git a/Makefile.in b/Makefile.in index e3cd296c..9ae28bac 100644 --- a/Makefile.in +++ b/Makefile.in @@ -46,7 +46,7 @@ CFLAGS=@CFLAGS@ CFLAGS_NOPIE=@CFLAGS_NOPIE@ CPPFLAGS=-I. -I$(srcdir) @CPPFLAGS@ $(PATHS) @DEFS@ PICFLAG=@PICFLAG@ -LIBS=@LIBS@ +LIBS=@LIBS@ -lpthread K5LIBS=@K5LIBS@ GSSLIBS=@GSSLIBS@ SSHDLIBS=@SSHDLIBS@ @@ -92,7 +92,7 @@ LIBOPENSSH_OBJS=\ LIBSSH_OBJS=${LIBOPENSSH_OBJS} \ authfd.o authfile.o \ canohost.o channels.o cipher.o cipher-aes.o cipher-aesctr.o \ - cipher-ctr.o cleanup.o \ + cipher-ctr.o cleanup.o cipher-ctr-mt.o \ compat.o fatal.o hostfile.o \ log.o match.o moduli.o nchan.o packet.o \ readpass.o ttymodes.o xmalloc.o addr.o addrmatch.o \ diff --git a/cipher-ctr-mt.c b/cipher-ctr-mt.c new file mode 100644 index 00000000..71172f1b --- /dev/null +++ b/cipher-ctr-mt.c @@ -0,0 +1,679 @@ +/* + * OpenSSH Multi-threaded AES-CTR Cipher + * + * Author: Benjamin Bennett + * Author: Mike Tasota + * Author: Chris Rapier + * Copyright (c) 2008-2013 Pittsburgh Supercomputing Center. All rights reserved. + * + * Based on original OpenSSH AES-CTR cipher. Small portions remain unchanged, + * Copyright (c) 2003 Markus Friedl + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "includes.h" + +#if defined(WITH_OPENSSL) +#include + +#include +#include + +#include + +#include "xmalloc.h" +#include "log.h" +#include + +/* compatibility with old or broken OpenSSL versions */ +#include "openbsd-compat/openssl-compat.h" + +#ifndef USE_BUILTIN_RIJNDAEL +#include +#endif + +#include + +/*-------------------- TUNABLES --------------------*/ +/* maximum number of threads and queues */ +#define MAX_THREADS 32 +#define MAX_NUMKQ (MAX_THREADS * 2) + +/* Number of pregen threads to use */ +int cipher_threads = 2; + +/* Number of keystream queues */ +int numkq = 4; + +/* Length of a keystream queue */ +#define KQLEN 4096 + +/* Processor cacheline length */ +#define CACHELINE_LEN 64 + +/* Collect thread stats and print at cancellation when in debug mode */ +#define CIPHER_THREAD_STATS + +/* Can the system do unaligned loads natively? */ +#if defined(__aarch64__) || \ + defined(__i386__) || \ + defined(__powerpc__) || \ + defined(__x86_64__) +# define CIPHER_UNALIGNED_OK +#endif +#if defined(__SIZEOF_INT128__) +# define CIPHER_INT128_OK +#endif +/*-------------------- END TUNABLES --------------------*/ + + +const EVP_CIPHER *evp_aes_ctr_mt(void); + +#ifdef CIPHER_THREAD_STATS +/* + * Struct to collect thread stats + */ +struct thread_stats { + u_int fills; + u_int skips; + u_int waits; + u_int drains; +}; + +/* + * Debug print the thread stats + * Use with pthread_cleanup_push for displaying at thread cancellation + */ +static void +thread_loop_stats(void *x) +{ + struct thread_stats *s = x; + + debug("AES-CTR MT tid %lu - %u fills, %u skips, %u waits", pthread_self(), + s->fills, s->skips, s->waits); +} + +# define STATS_STRUCT(s) struct thread_stats s +# define STATS_INIT(s) { memset(&s, 0, sizeof(s)); } +# define STATS_FILL(s) { s.fills++; } +# define STATS_SKIP(s) { s.skips++; } +# define STATS_WAIT(s) { s.waits++; } +# define STATS_DRAIN(s) { s.drains++; } +#else +# define STATS_STRUCT(s) +# define STATS_INIT(s) +# define STATS_FILL(s) +# define STATS_SKIP(s) +# define STATS_WAIT(s) +# define STATS_DRAIN(s) +#endif + +/* Keystream Queue state */ +enum { + KQINIT, + KQEMPTY, + KQFILLING, + KQFULL, + KQDRAINING +}; + +/* Keystream Queue struct */ +struct kq { + u_char keys[KQLEN][AES_BLOCK_SIZE]; + u_char ctr[AES_BLOCK_SIZE]; + u_char pad0[CACHELINE_LEN]; + int qstate; + pthread_mutex_t lock; + pthread_cond_t cond; + u_char pad1[CACHELINE_LEN]; +}; + +/* Context struct */ +struct ssh_aes_ctr_ctx_mt +{ + int struct_id; + struct kq q[MAX_NUMKQ]; + AES_KEY aes_ctx; + STATS_STRUCT(stats); + u_char aes_counter[AES_BLOCK_SIZE]; + pthread_t tid[MAX_THREADS]; + int id[MAX_THREADS]; + pthread_rwlock_t tid_lock; +#ifdef __APPLE__ + pthread_rwlock_t stop_lock; + int exit_flag; +#endif /* __APPLE__ */ + int state; + int qidx; + int ridx; +}; + +/* + * increment counter 'ctr', + * the counter is of size 'len' bytes and stored in network-byte-order. + * (LSB at ctr[len-1], MSB at ctr[0]) + */ +static void +ssh_ctr_inc(u_char *ctr, size_t len) +{ + int i; + + for (i = len - 1; i >= 0; i--) + if (++ctr[i]) /* continue on overflow */ + return; +} + +/* + * Add num to counter 'ctr' + */ +static void +ssh_ctr_add(u_char *ctr, uint32_t num, u_int len) +{ + int i; + uint16_t n; + + for (n = 0, i = len - 1; i >= 0 && (num || n); i--) { + n = ctr[i] + (num & 0xff) + n; + num >>= 8; + ctr[i] = n & 0xff; + n >>= 8; + } +} + +/* + * Threads may be cancelled in a pthread_cond_wait, we must free the mutex + */ +static void +thread_loop_cleanup(void *x) +{ + pthread_mutex_unlock((pthread_mutex_t *)x); +} + +#ifdef __APPLE__ +/* Check if we should exit, we are doing both cancel and exit condition + * since on OSX threads seem to occasionally fail to notice when they have + * been cancelled. We want to have a backup to make sure that we won't hang + * when the main process join()-s the cancelled thread. + */ +static void +thread_loop_check_exit(struct ssh_aes_ctr_ctx_mt *c) +{ + int exit_flag; + + pthread_rwlock_rdlock(&c->stop_lock); + exit_flag = c->exit_flag; + pthread_rwlock_unlock(&c->stop_lock); + + if (exit_flag) + pthread_exit(NULL); +} +#else +# define thread_loop_check_exit(s) +#endif /* __APPLE__ */ + +/* + * Helper function to terminate the helper threads + */ +static void +stop_and_join_pregen_threads(struct ssh_aes_ctr_ctx_mt *c) +{ + int i; + +#ifdef __APPLE__ + /* notify threads that they should exit */ + pthread_rwlock_wrlock(&c->stop_lock); + c->exit_flag = TRUE; + pthread_rwlock_unlock(&c->stop_lock); +#endif /* __APPLE__ */ + + /* Cancel pregen threads */ + for (i = 0; i < cipher_threads; i++) { + debug ("Canceled %lu (%d,%d)", c->tid[i], c->struct_id, c->id[i]); + pthread_cancel(c->tid[i]); + } + /* shouldn't need this - see commit logs for hpn-7_7_P1 -cjr 11/7/19*/ + /* for (i = 0; i < numkq; i++) { */ + /* pthread_mutex_lock(&c->q[i].lock); */ + /* pthread_cond_broadcast(&c->q[i].cond); */ + /* pthread_mutex_unlock(&c->q[i].lock); */ + /* } */ + for (i = 0; i < cipher_threads; i++) { + if (pthread_kill(c->tid[i], 0) != 0) + debug3("AES-CTR MT pthread_join failure: Invalid thread id %lu in %s", c->tid[i], __FUNCTION__); + else { + debug ("Joining %lu (%d, %d)", c->tid[i], c->struct_id, c->id[i]); + pthread_join(c->tid[i], NULL); + } + } +} + +/* + * The life of a pregen thread: + * Find empty keystream queues and fill them using their counter. + * When done, update counter for the next fill. + */ +static void * +thread_loop(void *x) +{ + AES_KEY key; + STATS_STRUCT(stats); + struct ssh_aes_ctr_ctx_mt *c = x; + struct kq *q; + int i; + int qidx; + pthread_t first_tid; + + /* Threads stats on cancellation */ + STATS_INIT(stats); +#ifdef CIPHER_THREAD_STATS + pthread_cleanup_push(thread_loop_stats, &stats); +#endif + + /* Thread local copy of AES key */ + memcpy(&key, &c->aes_ctx, sizeof(key)); + + pthread_rwlock_rdlock(&c->tid_lock); + first_tid = c->tid[0]; + pthread_rwlock_unlock(&c->tid_lock); + + /* + * Handle the special case of startup, one thread must fill + * the first KQ then mark it as draining. Lock held throughout. + */ + if (pthread_equal(pthread_self(), first_tid)) { + q = &c->q[0]; + pthread_mutex_lock(&q->lock); + if (q->qstate == KQINIT) { + for (i = 0; i < KQLEN; i++) { + AES_encrypt(q->ctr, q->keys[i], &key); + ssh_ctr_inc(q->ctr, AES_BLOCK_SIZE); + } + ssh_ctr_add(q->ctr, KQLEN * (numkq - 1), AES_BLOCK_SIZE); + q->qstate = KQDRAINING; + STATS_FILL(stats); + pthread_cond_broadcast(&q->cond); + } + pthread_mutex_unlock(&q->lock); + } else + STATS_SKIP(stats); + + /* + * Normal case is to find empty queues and fill them, skipping over + * queues already filled by other threads and stopping to wait for + * a draining queue to become empty. + * + * Multiple threads may be waiting on a draining queue and awoken + * when empty. The first thread to wake will mark it as filling, + * others will move on to fill, skip, or wait on the next queue. + */ + for (qidx = 1;; qidx = (qidx + 1) % numkq) { + /* Check if I was cancelled, also checked in cond_wait */ + pthread_testcancel(); + + /* Check if we should exit as well */ + thread_loop_check_exit(c); + + /* Lock queue and block if its draining */ + q = &c->q[qidx]; + pthread_mutex_lock(&q->lock); + pthread_cleanup_push(thread_loop_cleanup, &q->lock); + while (q->qstate == KQDRAINING || q->qstate == KQINIT) { + STATS_WAIT(stats); + thread_loop_check_exit(c); + pthread_cond_wait(&q->cond, &q->lock); + } + pthread_cleanup_pop(0); + + /* If filling or full, somebody else got it, skip */ + if (q->qstate != KQEMPTY) { + pthread_mutex_unlock(&q->lock); + STATS_SKIP(stats); + continue; + } + + /* + * Empty, let's fill it. + * Queue lock is relinquished while we do this so others + * can see that it's being filled. + */ + q->qstate = KQFILLING; + pthread_cond_broadcast(&q->cond); + pthread_mutex_unlock(&q->lock); + for (i = 0; i < KQLEN; i++) { + AES_encrypt(q->ctr, q->keys[i], &key); + ssh_ctr_inc(q->ctr, AES_BLOCK_SIZE); + } + + /* Re-lock, mark full and signal consumer */ + pthread_mutex_lock(&q->lock); + ssh_ctr_add(q->ctr, KQLEN * (numkq - 1), AES_BLOCK_SIZE); + q->qstate = KQFULL; + STATS_FILL(stats); + pthread_cond_broadcast(&q->cond); + pthread_mutex_unlock(&q->lock); + } + +#ifdef CIPHER_THREAD_STATS + /* Stats */ + pthread_cleanup_pop(1); +#endif + + return NULL; +} + +static int +ssh_aes_ctr(EVP_CIPHER_CTX *ctx, u_char *dest, const u_char *src, + LIBCRYPTO_EVP_INL_TYPE len) +{ + typedef union { +#ifdef CIPHER_INT128_OK + __uint128_t *u128; +#endif + uint64_t *u64; + uint32_t *u32; + uint8_t *u8; + const uint8_t *cu8; + uintptr_t u; + } ptrs_t; + ptrs_t destp, srcp, bufp; + uintptr_t align; + struct ssh_aes_ctr_ctx_mt *c; + struct kq *q, *oldq; + int ridx; + u_char *buf; + + if (len == 0) + return 1; + if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL) + return 0; + + q = &c->q[c->qidx]; + ridx = c->ridx; + + /* src already padded to block multiple */ + srcp.cu8 = src; + destp.u8 = dest; + while (len > 0) { + buf = q->keys[ridx]; + bufp.u8 = buf; + + /* figure out the alignment on the fly */ +#ifdef CIPHER_UNALIGNED_OK + align = 0; +#else + align = destp.u | srcp.u | bufp.u; +#endif + +#ifdef CIPHER_INT128_OK + if ((align & 0xf) == 0) { + destp.u128[0] = srcp.u128[0] ^ bufp.u128[0]; + } else +#endif + if ((align & 0x7) == 0) { + destp.u64[0] = srcp.u64[0] ^ bufp.u64[0]; + destp.u64[1] = srcp.u64[1] ^ bufp.u64[1]; + } else if ((align & 0x3) == 0) { + destp.u32[0] = srcp.u32[0] ^ bufp.u32[0]; + destp.u32[1] = srcp.u32[1] ^ bufp.u32[1]; + destp.u32[2] = srcp.u32[2] ^ bufp.u32[2]; + destp.u32[3] = srcp.u32[3] ^ bufp.u32[3]; + } else { + size_t i; + for (i = 0; i < AES_BLOCK_SIZE; ++i) + dest[i] = src[i] ^ buf[i]; + } + + destp.u += AES_BLOCK_SIZE; + srcp.u += AES_BLOCK_SIZE; + len -= AES_BLOCK_SIZE; + ssh_ctr_inc(c->aes_counter, AES_BLOCK_SIZE); + + /* Increment read index, switch queues on rollover */ + if ((ridx = (ridx + 1) % KQLEN) == 0) { + oldq = q; + + /* Mark next queue draining, may need to wait */ + c->qidx = (c->qidx + 1) % numkq; + q = &c->q[c->qidx]; + pthread_mutex_lock(&q->lock); + while (q->qstate != KQFULL) { + STATS_WAIT(c->stats); + pthread_cond_wait(&q->cond, &q->lock); + } + q->qstate = KQDRAINING; + pthread_cond_broadcast(&q->cond); + pthread_mutex_unlock(&q->lock); + + /* Mark consumed queue empty and signal producers */ + pthread_mutex_lock(&oldq->lock); + oldq->qstate = KQEMPTY; + STATS_DRAIN(c->stats); + pthread_cond_broadcast(&oldq->cond); + pthread_mutex_unlock(&oldq->lock); + } + } + c->ridx = ridx; + return 1; +} + +#define HAVE_NONE 0 +#define HAVE_KEY 1 +#define HAVE_IV 2 + +int X = 0; + +static int +ssh_aes_ctr_init(EVP_CIPHER_CTX *ctx, const u_char *key, const u_char *iv, + int enc) +{ + struct ssh_aes_ctr_ctx_mt *c; + int i; + + /* get the number of cores in the system */ + /* if it's not linux it currently defaults to 2 */ + /* divide by 2 to get threads for each direction (MODE_IN||MODE_OUT) */ +#ifdef __linux__ + cipher_threads = sysconf(_SC_NPROCESSORS_ONLN) / 2; +#endif /*__linux__*/ +#ifdef __APPLE__ + cipher_threads = sysconf(_SC_NPROCESSORS_ONLN) / 2; +#endif /*__APPLE__*/ +#ifdef __FREEBSD__ + int req[2]; + size_t len; + + req[0] = CTL_HW; + req[1] = HW_NCPU; + + len = sizeof(ncpu); + sysctl(req, 2, &cipher_threads, &len, NULL, 0); + cipher_threads = cipher_threads / 2; +#endif /*__FREEBSD__*/ + + /* if they have less than 4 cores spin up 4 threads anyway */ + if (cipher_threads < 2) + cipher_threads = 2; + + /* assure that we aren't trying to create more threads */ + /* than we have in the struct. cipher_threads is half the */ + /* total of allowable threads hence the odd looking math here */ + if (cipher_threads * 2 > MAX_THREADS) + cipher_threads = MAX_THREADS / 2; + + /* set the number of keystream queues */ + numkq = cipher_threads * 2; + + if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL) { + c = xmalloc(sizeof(*c)); + pthread_rwlock_init(&c->tid_lock, NULL); +#ifdef __APPLE__ + pthread_rwlock_init(&c->stop_lock, NULL); + c->exit_flag = FALSE; +#endif /* __APPLE__ */ + + c->state = HAVE_NONE; + for (i = 0; i < numkq; i++) { + pthread_mutex_init(&c->q[i].lock, NULL); + pthread_cond_init(&c->q[i].cond, NULL); + } + + STATS_INIT(c->stats); + EVP_CIPHER_CTX_set_app_data(ctx, c); + } + + if (c->state == (HAVE_KEY | HAVE_IV)) { + /* tell the pregen threads to exit */ + stop_and_join_pregen_threads(c); + +#ifdef __APPLE__ + /* reset the exit flag */ + c->exit_flag = FALSE; +#endif /* __APPLE__ */ + + /* Start over getting key & iv */ + c->state = HAVE_NONE; + } + + if (key != NULL) { + AES_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8, + &c->aes_ctx); + c->state |= HAVE_KEY; + } + + if (iv != NULL) { + memcpy(c->aes_counter, iv, AES_BLOCK_SIZE); + c->state |= HAVE_IV; + } + + if (c->state == (HAVE_KEY | HAVE_IV)) { + /* Clear queues */ + memcpy(c->q[0].ctr, c->aes_counter, AES_BLOCK_SIZE); + c->q[0].qstate = KQINIT; + for (i = 1; i < numkq; i++) { + memcpy(c->q[i].ctr, c->aes_counter, AES_BLOCK_SIZE); + ssh_ctr_add(c->q[i].ctr, i * KQLEN, AES_BLOCK_SIZE); + c->q[i].qstate = KQEMPTY; + } + c->qidx = 0; + c->ridx = 0; + + /* Start threads */ + for (i = 0; i < cipher_threads; i++) { + pthread_rwlock_wrlock(&c->tid_lock); + if (pthread_create(&c->tid[i], NULL, thread_loop, c) != 0) + debug ("AES-CTR MT Could not create thread in %s", __FUNCTION__); /*should die here */ + else { + if (!c->struct_id) + c->struct_id = X++; + c->id[i] = i; + debug ("AES-CTR MT spawned a thread with id %lu in %s (%d, %d)", c->tid[i], __FUNCTION__, c->struct_id, c->id[i]); + } + pthread_rwlock_unlock(&c->tid_lock); + } + pthread_mutex_lock(&c->q[0].lock); + while (c->q[0].qstate == KQINIT) + pthread_cond_wait(&c->q[0].cond, &c->q[0].lock); + pthread_mutex_unlock(&c->q[0].lock); + } + return 1; +} + +/* this function is no longer used but might prove handy in the future + * this comment also applies to ssh_aes_ctr_thread_reconstruction + */ +void +ssh_aes_ctr_thread_destroy(EVP_CIPHER_CTX *ctx) +{ + struct ssh_aes_ctr_ctx_mt *c; + + c = EVP_CIPHER_CTX_get_app_data(ctx); + stop_and_join_pregen_threads(c); +} + +void +ssh_aes_ctr_thread_reconstruction(EVP_CIPHER_CTX *ctx) +{ + struct ssh_aes_ctr_ctx_mt *c; + int i; + c = EVP_CIPHER_CTX_get_app_data(ctx); + /* reconstruct threads */ + for (i = 0; i < cipher_threads; i++) { + pthread_rwlock_wrlock(&c->tid_lock); + if (pthread_create(&c->tid[i], NULL, thread_loop, c) !=0 ) + debug("AES-CTR MT could not create thread in %s", __FUNCTION__); + else { + c->struct_id = X++; + c->id[i] = i; + debug ("AES-CTR MT spawned a thread with id %lu in %s (%d, %d)", c->tid[i], __FUNCTION__, c->struct_id, c->id[i]); + debug("AES-CTR MT spawned a thread with id %lu in %s", c->tid[i], __FUNCTION__); + } + pthread_rwlock_unlock(&c->tid_lock); + } +} + +static int +ssh_aes_ctr_cleanup(EVP_CIPHER_CTX *ctx) +{ + struct ssh_aes_ctr_ctx_mt *c; + + if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) != NULL) { +#ifdef CIPHER_THREAD_STATS + debug("AES-CTR MT main thread: %u drains, %u waits", c->stats.drains, + c->stats.waits); +#endif + stop_and_join_pregen_threads(c); + + memset(c, 0, sizeof(*c)); + free(c); + EVP_CIPHER_CTX_set_app_data(ctx, NULL); + } + return 1; +} + +/* */ +const EVP_CIPHER * +evp_aes_ctr_mt(void) +{ +# if OPENSSL_VERSION_NUMBER >= 0x10100000UL + static EVP_CIPHER *aes_ctr; + aes_ctr = EVP_CIPHER_meth_new(NID_undef, 16/*block*/, 16/*key*/); + EVP_CIPHER_meth_set_iv_length(aes_ctr, AES_BLOCK_SIZE); + EVP_CIPHER_meth_set_init(aes_ctr, ssh_aes_ctr_init); + EVP_CIPHER_meth_set_cleanup(aes_ctr, ssh_aes_ctr_cleanup); + EVP_CIPHER_meth_set_do_cipher(aes_ctr, ssh_aes_ctr); +# ifndef SSH_OLD_EVP + EVP_CIPHER_meth_set_flags(aes_ctr, EVP_CIPH_CBC_MODE + | EVP_CIPH_VARIABLE_LENGTH + | EVP_CIPH_ALWAYS_CALL_INIT + | EVP_CIPH_CUSTOM_IV); +# endif /*SSH_OLD_EVP*/ + return (aes_ctr); +# else /*earlier versions of openssl*/ + static EVP_CIPHER aes_ctr; + memset(&aes_ctr, 0, sizeof(EVP_CIPHER)); + aes_ctr.nid = NID_undef; + aes_ctr.block_size = AES_BLOCK_SIZE; + aes_ctr.iv_len = AES_BLOCK_SIZE; + aes_ctr.key_len = 16; + aes_ctr.init = ssh_aes_ctr_init; + aes_ctr.cleanup = ssh_aes_ctr_cleanup; + aes_ctr.do_cipher = ssh_aes_ctr; +# ifndef SSH_OLD_EVP + aes_ctr.flags = EVP_CIPH_CBC_MODE | EVP_CIPH_VARIABLE_LENGTH | + EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CUSTOM_IV; +# endif /*SSH_OLD_EVP*/ + return &aes_ctr; +# endif /*OPENSSH_VERSION_NUMBER*/ +} + +#endif /* defined(WITH_OPENSSL) */ diff --git a/cipher.c b/cipher.c index 639511cf..555a3356 100644 --- a/cipher.c +++ b/cipher.c @@ -55,6 +55,9 @@ #define EVP_CIPHER_CTX void #endif +/* for multi-threaded aes-ctr cipher */ +extern const EVP_CIPHER *evp_aes_ctr_mt(void); + struct sshcipher_ctx { int plaintext; int encrypt; @@ -83,7 +86,7 @@ struct sshcipher { #endif }; -static const struct sshcipher ciphers[] = { +static struct sshcipher ciphers[] = { #ifdef WITH_OPENSSL #ifndef OPENSSL_NO_DES { "3des-cbc", 8, 24, 0, 0, CFLAG_CBC, EVP_des_ede3_cbc }, @@ -152,6 +155,29 @@ compression_alg_list(int compression) #endif } +/* used to get the cipher name so when force rekeying to handle the + * single to multithreaded ctr cipher swap we only rekey when appropriate + */ +const char * +cipher_ctx_name(const struct sshcipher_ctx *cc) +{ + return cc->cipher->name; +} + +/* in order to get around sandbox and forking issues with a threaded cipher + * we set the initial pre-auth aes-ctr cipher to the default OpenSSH cipher + * post auth we set them to the new evp as defined by cipher-ctr-mt + */ +#ifdef WITH_OPENSSL +void +cipher_reset_multithreaded(void) +{ + cipher_by_name("aes128-ctr")->evptype = evp_aes_ctr_mt; + cipher_by_name("aes192-ctr")->evptype = evp_aes_ctr_mt; + cipher_by_name("aes256-ctr")->evptype = evp_aes_ctr_mt; +} +#endif + u_int cipher_blocksize(const struct sshcipher *c) { @@ -201,10 +227,10 @@ cipher_ctx_is_plaintext(struct sshcipher_ctx *cc) return cc->plaintext; } -const struct sshcipher * +struct sshcipher * cipher_by_name(const char *name) { - const struct sshcipher *c; + struct sshcipher *c; for (c = ciphers; c->name != NULL; c++) if (strcmp(c->name, name) == 0) return c; diff --git a/cipher.h b/cipher.h index 1a591cd7..7bcc7fa3 100644 --- a/cipher.h +++ b/cipher.h @@ -50,7 +50,9 @@ struct sshcipher; struct sshcipher_ctx; -const struct sshcipher *cipher_by_name(const char *); +void ssh_aes_ctr_thread_destroy(EVP_CIPHER_CTX *ctx); // defined in cipher-ctr-mt.c +void ssh_aes_ctr_thread_reconstruction(EVP_CIPHER_CTX *ctx); +struct sshcipher *cipher_by_name(const char *); const char *cipher_warning_message(const struct sshcipher_ctx *); int ciphers_valid(const char *); char *cipher_alg_list(char, int); @@ -68,6 +70,8 @@ u_int cipher_seclen(const struct sshcipher *); u_int cipher_authlen(const struct sshcipher *); u_int cipher_ivlen(const struct sshcipher *); u_int cipher_is_cbc(const struct sshcipher *); +void cipher_reset_multithreaded(void); +const char *cipher_ctx_name(const struct sshcipher_ctx *); u_int cipher_ctx_is_plaintext(struct sshcipher_ctx *); diff --git a/packet.c b/packet.c index 4bd8b4ec..bd1e2c55 100644 --- a/packet.c +++ b/packet.c @@ -293,7 +293,7 @@ struct ssh * ssh_packet_set_connection(struct ssh *ssh, int fd_in, int fd_out) { struct session_state *state; - const struct sshcipher *none = cipher_by_name("none"); + struct sshcipher *none = cipher_by_name("none"); int r; if (none == NULL) { @@ -954,6 +954,14 @@ ssh_set_newkeys(struct ssh *ssh, int mode) return 0; } +/* this supports the forced rekeying required for the NONE cipher */ +int rekey_requested = 0; +void +packet_request_rekeying(void) +{ + rekey_requested = 1; +} + #define MAX_PACKETS (1U<<31) static int ssh_packet_need_rekeying(struct ssh *ssh, u_int outbound_packet_len) @@ -980,6 +988,13 @@ ssh_packet_need_rekeying(struct ssh *ssh, u_int outbound_packet_len) if (state->p_send.packets == 0 && state->p_read.packets == 0) return 0; + /* used to force rekeying when called for by the none + * cipher switch and aes-mt-ctr methods -cjr */ + if (rekey_requested == 1) { + rekey_requested = 0; + return 1; + } + /* Time-based rekeying */ if (state->rekey_interval != 0 && (int64_t)state->rekey_time + state->rekey_interval <= monotime()) @@ -2707,3 +2722,10 @@ sshpkt_add_padding(struct ssh *ssh, u_char pad) ssh->state->extra_pad = pad; return 0; } + +/* need this for the moment for the aes-ctr cipher */ +void * +ssh_packet_get_send_context(struct ssh *ssh) +{ + return ssh->state->send_context; +} diff --git a/packet.h b/packet.h index c2544bd9..87f43160 100644 --- a/packet.h +++ b/packet.h @@ -169,6 +169,9 @@ time_t ssh_packet_get_rekey_timeout(struct ssh *); void *ssh_packet_get_input(struct ssh *); void *ssh_packet_get_output(struct ssh *); +void *ssh_packet_get_receive_context(struct ssh *); +void *ssh_packet_get_send_context(struct ssh *); +void packet_request_rekeying(void); /* new API */ int sshpkt_start(struct ssh *ssh, u_char type); diff --git a/readconf.c b/readconf.c index 724974b7..808a8521 100644 --- a/readconf.c +++ b/readconf.c @@ -166,6 +166,7 @@ typedef enum { oHashKnownHosts, oTunnel, oTunnelDevice, oLocalCommand, oPermitLocalCommand, oRemoteCommand, + oDisableMTAES, oVisualHostKey, oKexAlgorithms, oIPQoS, oRequestTTY, oIgnoreUnknown, oProxyUseFdpass, oCanonicalDomains, oCanonicalizeHostname, oCanonicalizeMaxDots, @@ -293,6 +294,7 @@ static struct { { "localcommand", oLocalCommand }, { "permitlocalcommand", oPermitLocalCommand }, { "remotecommand", oRemoteCommand }, + { "disablemtaes", oDisableMTAES }, { "visualhostkey", oVisualHostKey }, { "kexalgorithms", oKexAlgorithms }, { "ipqos", oIPQoS }, @@ -1101,6 +1103,10 @@ parse_time: multistate_ptr = multistate_strict_hostkey; goto parse_multistate; + case oDisableMTAES: + intptr = &options->disable_multithreaded; + goto parse_flag; + case oCompression: intptr = &options->compression; multistate_ptr = multistate_compression; @@ -2272,6 +2278,7 @@ initialize_options(Options * options) options->revoked_host_keys = NULL; options->fingerprint_hash = -1; options->update_hostkeys = -1; + options->disable_multithreaded = -1; options->hostbased_accepted_algos = NULL; options->pubkey_accepted_algos = NULL; options->known_hosts_command = NULL; @@ -2467,6 +2474,10 @@ fill_default_options(Options * options) if (options->sk_provider == NULL) options->sk_provider = xstrdup("$SSH_SK_PROVIDER"); #endif + if (options->update_hostkeys == -1) + options->update_hostkeys = 0; + if (options->disable_multithreaded == -1) + options->disable_multithreaded = 0; /* Expand KEX name lists */ all_cipher = cipher_alg_list(',', 0); diff --git a/readconf.h b/readconf.h index 2fba866e..7f8f0227 100644 --- a/readconf.h +++ b/readconf.h @@ -120,6 +120,7 @@ typedef struct { int enable_ssh_keysign; int64_t rekey_limit; + int disable_multithreaded; /*disable multithreaded aes-ctr*/ int rekey_interval; int no_host_authentication_for_localhost; int identities_only; diff --git a/servconf.c b/servconf.c index 9695583a..09fe65c9 100644 --- a/servconf.c +++ b/servconf.c @@ -92,6 +92,7 @@ initialize_server_options(ServerOptions *options) /* Portable-specific options */ options->use_pam = -1; + options->disable_multithreaded = -1; /* Standard Options */ options->num_ports = 0; @@ -424,6 +425,8 @@ fill_default_server_options(ServerOptions *options) } if (options->permit_tun == -1) options->permit_tun = SSH_TUNMODE_NO; + if (options->disable_multithreaded == -1) + options->disable_multithreaded = 0; if (options->ip_qos_interactive == -1) options->ip_qos_interactive = IPTOS_DSCP_AF21; if (options->ip_qos_bulk == -1) @@ -487,6 +490,7 @@ typedef enum { sBadOption, /* == unknown option */ /* Portable-specific options */ sUsePAM, + sDisableMTAES, /* Standard Options */ sPort, sHostKeyFile, sLoginGraceTime, sPermitRootLogin, sLogFacility, sLogLevel, sLogVerbose, @@ -662,6 +666,7 @@ static struct { { "authorizedprincipalsfile", sAuthorizedPrincipalsFile, SSHCFG_ALL }, { "kexalgorithms", sKexAlgorithms, SSHCFG_GLOBAL }, { "include", sInclude, SSHCFG_ALL }, + { "disableMTAES", sDisableMTAES, SSHCFG_ALL }, { "ipqos", sIPQoS, SSHCFG_ALL }, { "authorizedkeyscommand", sAuthorizedKeysCommand, SSHCFG_ALL }, { "authorizedkeyscommanduser", sAuthorizedKeysCommandUser, SSHCFG_ALL }, @@ -2221,6 +2226,10 @@ process_server_config_line_depth(ServerOptions *options, char *line, } break; + case sDisableMTAES: + intptr = &options->disable_multithreaded; + goto parse_flag; + case sIPQoS: arg = strdelim(&cp); if ((value = parse_ipqos(arg)) == -1) diff --git a/servconf.h b/servconf.h index 4f4fd9ba..16875eb6 100644 --- a/servconf.h +++ b/servconf.h @@ -199,6 +199,8 @@ typedef struct { char *adm_forced_command; + int disable_multithreaded; /*disable multithreaded aes-ctr*/ + int use_pam; /* Enable auth via PAM */ int permit_tun; diff --git a/ssh.c b/ssh.c index 53330da5..54465b49 100644 --- a/ssh.c +++ b/ssh.c @@ -1740,6 +1740,8 @@ control_persist_detach(void) setproctitle("%s [mux]", options.control_path); } +extern const EVP_CIPHER *evp_aes_ctr_mt(void); + /* Do fork() after authentication. Used by "ssh -f" */ static void fork_postauth(void) diff --git a/sshconnect2.c b/sshconnect2.c index 059c9480..04ca0e2e 100644 --- a/sshconnect2.c +++ b/sshconnect2.c @@ -489,6 +489,26 @@ ssh_userauth2(struct ssh *ssh, const char *local_user, if (!authctxt.success) fatal("Authentication failed."); + +#ifdef WITH_OPENSSL + if (options.disable_multithreaded == 0) { + /* if we are using aes-ctr there can be issues in either a fork or sandbox + * so the initial aes-ctr is defined to point to the original single process + * evp. After authentication we'll be past the fork and the sandboxed privsep + * so we repoint the define to the multithreaded evp. To start the threads we + * then force a rekey + */ + const void *cc = ssh_packet_get_send_context(ssh); + + /* only do this for the ctr cipher. otherwise gcm mode breaks. Don't know why though */ + if (strstr(cipher_ctx_name(cc), "ctr")) { + debug("Single to Multithread CTR cipher swap - client request"); + cipher_reset_multithreaded(); + packet_request_rekeying(); + } + } +#endif + debug("Authentication succeeded (%s).", authctxt.method->name); } diff --git a/sshd.c b/sshd.c index 6277e6d6..bf3d6e4a 100644 --- a/sshd.c +++ b/sshd.c @@ -2273,6 +2273,25 @@ main(int ac, char **av) /* Try to send all our hostkeys to the client */ notify_hostkeys(ssh); +#ifdef WITH_OPENSSL + if (options.disable_multithreaded == 0) { + /* if we are using aes-ctr there can be issues in either a fork or sandbox + * so the initial aes-ctr is defined to point ot the original single process + * evp. After authentication we'll be past the fork and the sandboxed privsep + * so we repoint the define to the multithreaded evp. To start the threads we + * then force a rekey + */ + const void *cc = ssh_packet_get_send_context(the_active_state); + + /* only rekey if necessary. If we don't do this gcm mode cipher breaks */ + if (strstr(cipher_ctx_name(cc), "ctr")) { + debug("Single to Multithreaded CTR cipher swap - server request"); + cipher_reset_multithreaded(); + packet_request_rekeying(); + } + } +#endif + /* Start session. */ do_authenticated(ssh, authctxt);