Browse Source

Replace the wolfssl Diffie Hellman implementation with mbedtls (#74)

* Switch to the mbedtls implementation of Diffie Hellman
Don Porter 6 years ago
parent
commit
8add0b94f7

+ 28 - 3
Pal/lib/Makefile

@@ -12,15 +12,40 @@ include ../src/host/$(PAL_HOST)/Makefile.am
 
 
 CFLAGS += -I. -I../include -I../src
 CFLAGS += -I. -I../include -I../src
 subdirs = string stdlib network graphene util crypto
 subdirs = string stdlib network graphene util crypto
-headers = api.h
+headers = api.h pal_crypto.h
+
+# Choose Crypto provider among (mbedtls|wolfssl)
+CRYPTO_PROVIDER ?= mbedtls
 
 
 # Select which crypto adpater you want to use here. This has to match
 # Select which crypto adpater you want to use here. This has to match
 # the #define in pal_crypto.h.
 # the #define in pal_crypto.h.
+#
+# Unfortunately, we cannot use just one .c file for the adapter. The LibOS
+# shim links against the crypto library, but it doesn't use Diffie-Hellman.
+# If the Diffie-Hellman stubs are in the same .o file as the SHA1 stubs,
+# this pulls Diffie-Hellman code into LibOS shim, resulting in unsatisfied
+# symbols.
+ifeq ($(CRYPTO_PROVIDER),mbedtls)
 subdirs += crypto/mbedtls
 subdirs += crypto/mbedtls
-#subdirs += crypto/wolfssl
+headers += $(wildcard crypto/mbedtls/mbedtls/*.h)
+endif
+ifeq ($(CRYPTO_PROVIDER),wolfssl)
+subdirs += crypto/wolfssl
+headers += $(wildcard crypto/wolfssl/*.h)
+endif
+
 objs	= $(foreach dir,$(subdirs),$(patsubst %.c,%.o,$(wildcard $(dir)/*.c)))
 objs	= $(foreach dir,$(subdirs),$(patsubst %.c,%.o,$(wildcard $(dir)/*.c)))
+
+ifeq ($(CRYPTO_PROVIDER),mbedtls)
+CFLAGS += -DCRYPTO_USE_MBEDTLS
 objs += crypto/adapters/mbedtls_adapter.o
 objs += crypto/adapters/mbedtls_adapter.o
-#objs += crypto/adapters/wolfssl_adapter.o
+objs += crypto/adapters/mbedtls_dh.o
+endif
+ifeq ($(CRYPTO_PROVIDER),wolfssl)
+CFLAGS += -DCRYPTO_USE_WOLFSSL
+objs += crypto/adapters/wolfssl_adapter.o
+objs += crypto/adapters/wolfssl_dh.o
+endif
 
 
 all: $(target)graphene-lib.a
 all: $(target)graphene-lib.a
 
 

+ 104 - 0
Pal/lib/crypto/adapters/mbedtls_dh.c

@@ -0,0 +1,104 @@
+/* Copyright (C) 2017 Fortanix, Inc.
+
+   This file is part of Graphene Library OS.
+
+   Graphene Library OS is free software: you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation, either version 3 of the
+   License, or (at your option) any later version.
+
+   Graphene Library OS is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "pal.h"
+#include "pal_crypto.h"
+#include "pal_error.h"
+#include "pal_debug.h"
+
+/* This is declared in pal_internal.h, but that can't be included here. */
+int _DkRandomBitsRead(void *buffer, int size);
+
+/* Wrapper to provide mbedtls the RNG interface it expects. It passes an
+ * extra context parameter, and expects a return value of 0 for success
+ * and nonzero for failure. */
+static int RandomWrapper(void *private, unsigned char *data, size_t size)
+{
+    return _DkRandomBitsRead(data, size) != size;
+}
+
+int lib_DhInit(LIB_DH_CONTEXT *context)
+{
+    int ret;
+    mbedtls_dhm_init(context);
+
+    /* Configure parameters. Note that custom Diffie-Hellman parameters 
+     * are considered more secure, but require more data be exchanged
+     * between the two parties to establish the parameters, so we haven't
+     * implemented that yet. */
+    ret = mbedtls_mpi_read_string(&context->P, 16 /* radix */,
+                                  MBEDTLS_DHM_RFC3526_MODP_2048_P);
+    if (ret != 0) {
+        pal_printf("D-H initialization failed: %d\n", ret);
+        return ret;
+    }
+
+    ret = mbedtls_mpi_read_string(&context->G, 16 /* radix */,
+                                  MBEDTLS_DHM_RFC3526_MODP_2048_G);
+    if (ret != 0) {
+        pal_printf("D-H initialization failed: %d\n", ret);
+        return ret;
+    }
+
+    context->len = mbedtls_mpi_size(&context->P);
+
+    return 0;
+}
+
+int lib_DhCreatePublic(LIB_DH_CONTEXT *context, uint8_t *public,
+                     uint64_t *public_size)
+{
+    int ret;
+
+    if (*public_size != DH_SIZE)
+        return -PAL_ERROR_INVAL;
+
+    /* The RNG here is used to generate secret exponent X. */
+    ret = mbedtls_dhm_make_public(context, context->len, public, *public_size,
+                                  RandomWrapper, NULL);
+    if (ret != 0)
+        return ret;
+
+    /* mbedtls writes leading zeros in the big-endian output to pad to
+     * public_size, so leave caller's public_size unchanged */
+    return 0;
+}
+
+int lib_DhCalcSecret(LIB_DH_CONTEXT *context, uint8_t *peer, uint64_t peer_size,
+                   uint8_t *secret, uint64_t *secret_size)
+{
+    int ret;
+
+    if (*secret_size != DH_SIZE)
+        return -PAL_ERROR_INVAL;
+
+    ret = mbedtls_dhm_read_public(context, peer, peer_size);
+    if (ret != 0)
+        return ret;
+
+    /* The RNG here is used for blinding against timing attacks if X is
+     * reused and not used otherwise. mbedtls recommends always passing
+     * in an RNG. */
+    return mbedtls_dhm_calc_secret(context, secret, *secret_size, secret_size,
+                                   RandomWrapper, NULL);
+}
+
+void lib_DhFinal(LIB_DH_CONTEXT *context)
+{
+    /* This call zeros out context for us. */
+    mbedtls_dhm_free(context);
+}

+ 122 - 0
Pal/lib/crypto/adapters/wolfssl_dh.c

@@ -0,0 +1,122 @@
+/* Copyright (C) 2017 Fortanix, Inc.
+
+   This file is part of Graphene Library OS.
+
+   Graphene Library OS is free software: you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation, either version 3 of the
+   License, or (at your option) any later version.
+
+   Graphene Library OS is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "api.h"
+#include "pal.h"
+#include "pal_error.h"
+#include "pal_crypto.h"
+
+static struct {
+    uint8_t p[DH_SIZE], q[20], g[DH_SIZE];
+} dh_param = {
+    {
+        0xfd, 0x7f, 0x53, 0x81, 0x1d, 0x75, 0x12, 0x29,
+        0x52, 0xdf, 0x4a, 0x9c, 0x2e, 0xec, 0xe4, 0xe7,
+        0xf6, 0x11, 0xb7, 0x52, 0x3c, 0xef, 0x44, 0x00,
+        0xc3, 0x1e, 0x3f, 0x80, 0xb6, 0x51, 0x26, 0x69,
+        0x45, 0x5d, 0x40, 0x22, 0x51, 0xfb, 0x59, 0x3d,
+        0x8d, 0x58, 0xfa, 0xbf, 0xc5, 0xf5, 0xba, 0x30,
+        0xf6, 0xcb, 0x9b, 0x55, 0x6c, 0xd7, 0x81, 0x3b,
+        0x80, 0x1d, 0x34, 0x6f, 0xf2, 0x66, 0x60, 0xb7,
+        0x6b, 0x99, 0x50, 0xa5, 0xa4, 0x9f, 0x9f, 0xe8,
+        0x04, 0x7b, 0x10, 0x22, 0xc2, 0x4f, 0xbb, 0xa9,
+        0xd7, 0xfe, 0xb7, 0xc6, 0x1b, 0xf8, 0x3b, 0x57,
+        0xe7, 0xc6, 0xa8, 0xa6, 0x15, 0x0f, 0x04, 0xfb,
+        0x83, 0xf6, 0xd3, 0xc5, 0x1e, 0xc3, 0x02, 0x35,
+        0x54, 0x13, 0x5a, 0x16, 0x91, 0x32, 0xf6, 0x75,
+        0xf3, 0xae, 0x2b, 0x61, 0xd7, 0x2a, 0xef, 0xf2,
+        0x22, 0x03, 0x19, 0x9d, 0xd1, 0x48, 0x01, 0xc7,
+    },
+
+    {
+        0x97, 0x60, 0x50, 0x8f, 0x15, 0x23, 0x0b, 0xcc,
+        0xb2, 0x92, 0xb9, 0x82, 0xa2, 0xeb, 0x84, 0x0b,
+        0xf0, 0x58, 0x1c, 0xf5,
+    },
+
+    {
+        0xf7, 0xe1, 0xa0, 0x85, 0xd6, 0x9b, 0x3d, 0xde,
+        0xcb, 0xbc, 0xab, 0x5c, 0x36, 0xb8, 0x57, 0xb9,
+        0x79, 0x94, 0xaf, 0xbb, 0xfa, 0x3a, 0xea, 0x82,
+        0xf9, 0x57, 0x4c, 0x0b, 0x3d, 0x07, 0x82, 0x67,
+        0x51, 0x59, 0x57, 0x8e, 0xba, 0xd4, 0x59, 0x4f,
+        0xe6, 0x71, 0x07, 0x10, 0x81, 0x80, 0xb4, 0x49,
+        0x16, 0x71, 0x23, 0xe8, 0x4c, 0x28, 0x16, 0x13,
+        0xb7, 0xcf, 0x09, 0x32, 0x8c, 0xc8, 0xa6, 0xe1,
+        0x3c, 0x16, 0x7a, 0x8b, 0x54, 0x7c, 0x8d, 0x28,
+        0xe0, 0xa3, 0xae, 0x1e, 0x2b, 0xb3, 0xa6, 0x75,
+        0x91, 0x6e, 0xa3, 0x7f, 0x0b, 0xfa, 0x21, 0x35,
+        0x62, 0xf1, 0xfb, 0x62, 0x7a, 0x01, 0x24, 0x3b,
+        0xcc, 0xa4, 0xf1, 0xbe, 0xa8, 0x51, 0x90, 0x89,
+        0xa8, 0x83, 0xdf, 0xe1, 0x5a, 0xe5, 0x9f, 0x06,
+        0x92, 0x8b, 0x66, 0x5e, 0x80, 0x7b, 0x55, 0x25,
+        0x64, 0x01, 0x4c, 0x3b, 0xfe, 0xcf, 0x49, 0x2a,
+    },
+};
+
+int lib_DhInit(LIB_DH_CONTEXT *context)
+{
+    memset(context, 0, sizeof *context);
+    InitDhKey(&context->key);
+    return DhSetKey(&context->key, dh_param.p, sizeof dh_param.p,
+                    dh_param.g, sizeof dh_param.g);
+}
+
+int lib_DhCreatePublic(LIB_DH_CONTEXT *context, uint8_t *public,
+                       uint64_t *_public_size)
+{
+    uint32_t public_size;
+    int ret;
+
+    if (*_public_size != DH_SIZE)
+        return -PAL_ERROR_INVAL;
+
+    public_size = (uint32_t) *_public_size;
+    ret = DhGenerateKeyPair(&context->key, context->priv, &context->priv_size,
+                            public, &public_size);
+    *_public_size = public_size;
+    return ret;
+}
+
+int lib_DhCalcSecret(LIB_DH_CONTEXT *context, uint8_t *peer, uint64_t peer_size,
+                     uint8_t *secret, uint64_t *_secret_size)
+{
+    int ret;
+    uint32_t secret_size;
+
+    if (peer_size > DH_SIZE)
+        return -PAL_ERROR_INVAL;
+
+    if (*_secret_size != DH_SIZE)
+        return -PAL_ERROR_INVAL;
+
+    secret_size = (uint32_t) *_secret_size;
+
+    ret = DhAgree(&context->key, secret, &secret_size, context->priv,
+                  context->priv_size, peer, (uint32_t) peer_size);
+    *_secret_size = secret_size;
+    return ret;
+}
+
+void lib_DhFinal(LIB_DH_CONTEXT *context)
+{
+    /* Frees memory associated with the bignums. */
+    FreeDhKey(&context->key);
+
+    /* Clear the buffer to avoid any potential information leaks. */
+    memset(context, 0, sizeof *context);
+}

+ 2445 - 0
Pal/lib/crypto/mbedtls/bignum.c

@@ -0,0 +1,2445 @@
+/*
+ *  Multi-precision integer library
+ *
+ *  Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of mbed TLS (https://tls.mbed.org)
+ */
+
+/*
+ *  The following sources were referenced in the design of this Multi-precision
+ *  Integer library:
+ *
+ *  [1] Handbook of Applied Cryptography - 1997
+ *      Menezes, van Oorschot and Vanstone
+ *
+ *  [2] Multi-Precision Math
+ *      Tom St Denis
+ *      https://github.com/libtom/libtommath/blob/develop/tommath.pdf
+ *
+ *  [3] GNU Multi-Precision Arithmetic Library
+ *      https://gmplib.org/manual/index.html
+ *
+ */
+
+#if !defined(MBEDTLS_CONFIG_FILE)
+#include "mbedtls/config.h"
+#else
+#include MBEDTLS_CONFIG_FILE
+#endif
+
+#if defined(MBEDTLS_BIGNUM_C)
+
+#include "mbedtls/bignum.h"
+#include "mbedtls/bn_mul.h"
+
+#if defined(MBEDTLS_PLATFORM_C)
+#include "mbedtls/platform.h"
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#define mbedtls_printf     printf
+#define mbedtls_calloc    calloc
+#define mbedtls_free       free
+#endif
+
+/* Implementation that should never be optimized out by the compiler */
+static void mbedtls_mpi_zeroize( mbedtls_mpi_uint *v, size_t n ) {
+    volatile mbedtls_mpi_uint *p = v; while( n-- ) *p++ = 0;
+}
+
+#define ciL    (sizeof(mbedtls_mpi_uint))         /* chars in limb  */
+#define biL    (ciL << 3)               /* bits  in limb  */
+#define biH    (ciL << 2)               /* half limb size */
+
+#define MPI_SIZE_T_MAX  ( (size_t) -1 ) /* SIZE_T_MAX is not standard */
+
+/*
+ * Convert between bits/chars and number of limbs
+ * Divide first in order to avoid potential overflows
+ */
+#define BITS_TO_LIMBS(i)  ( (i) / biL + ( (i) % biL != 0 ) )
+#define CHARS_TO_LIMBS(i) ( (i) / ciL + ( (i) % ciL != 0 ) )
+
+/*
+ * Initialize one MPI
+ */
+void mbedtls_mpi_init( mbedtls_mpi *X )
+{
+    if( X == NULL )
+        return;
+
+    X->s = 1;
+    X->n = 0;
+    X->p = NULL;
+}
+
+/*
+ * Unallocate one MPI
+ */
+void mbedtls_mpi_free( mbedtls_mpi *X )
+{
+    if( X == NULL )
+        return;
+
+    if( X->p != NULL )
+    {
+        mbedtls_mpi_zeroize( X->p, X->n );
+        mbedtls_free( X->p );
+    }
+
+    X->s = 1;
+    X->n = 0;
+    X->p = NULL;
+}
+
+/*
+ * Enlarge to the specified number of limbs
+ */
+int mbedtls_mpi_grow( mbedtls_mpi *X, size_t nblimbs )
+{
+    mbedtls_mpi_uint *p;
+
+    if( nblimbs > MBEDTLS_MPI_MAX_LIMBS )
+        return( MBEDTLS_ERR_MPI_ALLOC_FAILED );
+
+    if( X->n < nblimbs )
+    {
+        if( ( p = (mbedtls_mpi_uint*)mbedtls_calloc( nblimbs, ciL ) ) == NULL )
+            return( MBEDTLS_ERR_MPI_ALLOC_FAILED );
+
+        if( X->p != NULL )
+        {
+            memcpy( p, X->p, X->n * ciL );
+            mbedtls_mpi_zeroize( X->p, X->n );
+            mbedtls_free( X->p );
+        }
+
+        X->n = nblimbs;
+        X->p = p;
+    }
+
+    return( 0 );
+}
+
+/*
+ * Resize down as much as possible,
+ * while keeping at least the specified number of limbs
+ */
+int mbedtls_mpi_shrink( mbedtls_mpi *X, size_t nblimbs )
+{
+    mbedtls_mpi_uint *p;
+    size_t i;
+
+    /* Actually resize up in this case */
+    if( X->n <= nblimbs )
+        return( mbedtls_mpi_grow( X, nblimbs ) );
+
+    for( i = X->n - 1; i > 0; i-- )
+        if( X->p[i] != 0 )
+            break;
+    i++;
+
+    if( i < nblimbs )
+        i = nblimbs;
+
+    if( ( p = (mbedtls_mpi_uint*)mbedtls_calloc( i, ciL ) ) == NULL )
+        return( MBEDTLS_ERR_MPI_ALLOC_FAILED );
+
+    if( X->p != NULL )
+    {
+        memcpy( p, X->p, i * ciL );
+        mbedtls_mpi_zeroize( X->p, X->n );
+        mbedtls_free( X->p );
+    }
+
+    X->n = i;
+    X->p = p;
+
+    return( 0 );
+}
+
+/*
+ * Copy the contents of Y into X
+ */
+int mbedtls_mpi_copy( mbedtls_mpi *X, const mbedtls_mpi *Y )
+{
+    int ret;
+    size_t i;
+
+    if( X == Y )
+        return( 0 );
+
+    if( Y->p == NULL )
+    {
+        mbedtls_mpi_free( X );
+        return( 0 );
+    }
+
+    for( i = Y->n - 1; i > 0; i-- )
+        if( Y->p[i] != 0 )
+            break;
+    i++;
+
+    X->s = Y->s;
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i ) );
+
+    memset( X->p, 0, X->n * ciL );
+    memcpy( X->p, Y->p, i * ciL );
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Swap the contents of X and Y
+ */
+void mbedtls_mpi_swap( mbedtls_mpi *X, mbedtls_mpi *Y )
+{
+    mbedtls_mpi T;
+
+    memcpy( &T,  X, sizeof( mbedtls_mpi ) );
+    memcpy(  X,  Y, sizeof( mbedtls_mpi ) );
+    memcpy(  Y, &T, sizeof( mbedtls_mpi ) );
+}
+
+/*
+ * Conditionally assign X = Y, without leaking information
+ * about whether the assignment was made or not.
+ * (Leaking information about the respective sizes of X and Y is ok however.)
+ */
+int mbedtls_mpi_safe_cond_assign( mbedtls_mpi *X, const mbedtls_mpi *Y, unsigned char assign )
+{
+    int ret = 0;
+    size_t i;
+
+    /* make sure assign is 0 or 1 in a time-constant manner */
+    assign = (assign | (unsigned char)-assign) >> 7;
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, Y->n ) );
+
+    X->s = X->s * ( 1 - assign ) + Y->s * assign;
+
+    for( i = 0; i < Y->n; i++ )
+        X->p[i] = X->p[i] * ( 1 - assign ) + Y->p[i] * assign;
+
+    for( ; i < X->n; i++ )
+        X->p[i] *= ( 1 - assign );
+
+cleanup:
+    return( ret );
+}
+
+/*
+ * Conditionally swap X and Y, without leaking information
+ * about whether the swap was made or not.
+ * Here it is not ok to simply swap the pointers, which whould lead to
+ * different memory access patterns when X and Y are used afterwards.
+ */
+int mbedtls_mpi_safe_cond_swap( mbedtls_mpi *X, mbedtls_mpi *Y, unsigned char swap )
+{
+    int ret, s;
+    size_t i;
+    mbedtls_mpi_uint tmp;
+
+    if( X == Y )
+        return( 0 );
+
+    /* make sure swap is 0 or 1 in a time-constant manner */
+    swap = (swap | (unsigned char)-swap) >> 7;
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, Y->n ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( Y, X->n ) );
+
+    s = X->s;
+    X->s = X->s * ( 1 - swap ) + Y->s * swap;
+    Y->s = Y->s * ( 1 - swap ) +    s * swap;
+
+
+    for( i = 0; i < X->n; i++ )
+    {
+        tmp = X->p[i];
+        X->p[i] = X->p[i] * ( 1 - swap ) + Y->p[i] * swap;
+        Y->p[i] = Y->p[i] * ( 1 - swap ) +     tmp * swap;
+    }
+
+cleanup:
+    return( ret );
+}
+
+/*
+ * Set value from integer
+ */
+int mbedtls_mpi_lset( mbedtls_mpi *X, mbedtls_mpi_sint z )
+{
+    int ret;
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, 1 ) );
+    memset( X->p, 0, X->n * ciL );
+
+    X->p[0] = ( z < 0 ) ? -z : z;
+    X->s    = ( z < 0 ) ? -1 : 1;
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Get a specific bit
+ */
+int mbedtls_mpi_get_bit( const mbedtls_mpi *X, size_t pos )
+{
+    if( X->n * biL <= pos )
+        return( 0 );
+
+    return( ( X->p[pos / biL] >> ( pos % biL ) ) & 0x01 );
+}
+
+/*
+ * Set a bit to a specific value of 0 or 1
+ */
+int mbedtls_mpi_set_bit( mbedtls_mpi *X, size_t pos, unsigned char val )
+{
+    int ret = 0;
+    size_t off = pos / biL;
+    size_t idx = pos % biL;
+
+    if( val != 0 && val != 1 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+
+    if( X->n * biL <= pos )
+    {
+        if( val == 0 )
+            return( 0 );
+
+        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, off + 1 ) );
+    }
+
+    X->p[off] &= ~( (mbedtls_mpi_uint) 0x01 << idx );
+    X->p[off] |= (mbedtls_mpi_uint) val << idx;
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Return the number of less significant zero-bits
+ */
+size_t mbedtls_mpi_lsb( const mbedtls_mpi *X )
+{
+    size_t i, j, count = 0;
+
+    for( i = 0; i < X->n; i++ )
+        for( j = 0; j < biL; j++, count++ )
+            if( ( ( X->p[i] >> j ) & 1 ) != 0 )
+                return( count );
+
+    return( 0 );
+}
+
+/*
+ * Count leading zero bits in a given integer
+ */
+static size_t mbedtls_clz( const mbedtls_mpi_uint x )
+{
+    size_t j;
+    mbedtls_mpi_uint mask = (mbedtls_mpi_uint) 1 << (biL - 1);
+
+    for( j = 0; j < biL; j++ )
+    {
+        if( x & mask ) break;
+
+        mask >>= 1;
+    }
+
+    return j;
+}
+
+/*
+ * Return the number of bits
+ */
+size_t mbedtls_mpi_bitlen( const mbedtls_mpi *X )
+{
+    size_t i, j;
+
+    if( X->n == 0 )
+        return( 0 );
+
+    for( i = X->n - 1; i > 0; i-- )
+        if( X->p[i] != 0 )
+            break;
+
+    j = biL - mbedtls_clz( X->p[i] );
+
+    return( ( i * biL ) + j );
+}
+
+/*
+ * Return the total size in bytes
+ */
+size_t mbedtls_mpi_size( const mbedtls_mpi *X )
+{
+    return( ( mbedtls_mpi_bitlen( X ) + 7 ) >> 3 );
+}
+
+/*
+ * Convert an ASCII character to digit value
+ */
+static int mpi_get_digit( mbedtls_mpi_uint *d, int radix, char c )
+{
+    *d = 255;
+
+    if( c >= 0x30 && c <= 0x39 ) *d = c - 0x30;
+    if( c >= 0x41 && c <= 0x46 ) *d = c - 0x37;
+    if( c >= 0x61 && c <= 0x66 ) *d = c - 0x57;
+
+    if( *d >= (mbedtls_mpi_uint) radix )
+        return( MBEDTLS_ERR_MPI_INVALID_CHARACTER );
+
+    return( 0 );
+}
+
+/*
+ * Import from an ASCII string
+ */
+int mbedtls_mpi_read_string( mbedtls_mpi *X, int radix, const char *s )
+{
+    int ret;
+    size_t i, j, slen, n;
+    mbedtls_mpi_uint d;
+    mbedtls_mpi T;
+
+    if( radix < 2 || radix > 16 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+
+    mbedtls_mpi_init( &T );
+
+    slen = strlen( s );
+
+    if( radix == 16 )
+    {
+        if( slen > MPI_SIZE_T_MAX >> 2 )
+            return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+
+        n = BITS_TO_LIMBS( slen << 2 );
+
+        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, n ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) );
+
+        for( i = slen, j = 0; i > 0; i--, j++ )
+        {
+            if( i == 1 && s[i - 1] == '-' )
+            {
+                X->s = -1;
+                break;
+            }
+
+            MBEDTLS_MPI_CHK( mpi_get_digit( &d, radix, s[i - 1] ) );
+            X->p[j / ( 2 * ciL )] |= d << ( ( j % ( 2 * ciL ) ) << 2 );
+        }
+    }
+    else
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) );
+
+        for( i = 0; i < slen; i++ )
+        {
+            if( i == 0 && s[i] == '-' )
+            {
+                X->s = -1;
+                continue;
+            }
+
+            MBEDTLS_MPI_CHK( mpi_get_digit( &d, radix, s[i] ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &T, X, radix ) );
+
+            if( X->s == 1 )
+            {
+                MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, &T, d ) );
+            }
+            else
+            {
+                MBEDTLS_MPI_CHK( mbedtls_mpi_sub_int( X, &T, d ) );
+            }
+        }
+    }
+
+cleanup:
+
+    mbedtls_mpi_free( &T );
+
+    return( ret );
+}
+
+/*
+ * Helper to write the digits high-order first
+ */
+static int mpi_write_hlp( mbedtls_mpi *X, int radix, char **p )
+{
+    int ret;
+    mbedtls_mpi_uint r;
+
+    if( radix < 2 || radix > 16 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mod_int( &r, X, radix ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_div_int( X, NULL, X, radix ) );
+
+    if( mbedtls_mpi_cmp_int( X, 0 ) != 0 )
+        MBEDTLS_MPI_CHK( mpi_write_hlp( X, radix, p ) );
+
+    if( r < 10 )
+        *(*p)++ = (char)( r + 0x30 );
+    else
+        *(*p)++ = (char)( r + 0x37 );
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Export into an ASCII string
+ */
+int mbedtls_mpi_write_string( const mbedtls_mpi *X, int radix,
+                              char *buf, size_t buflen, size_t *olen )
+{
+    int ret = 0;
+    size_t n;
+    char *p;
+    mbedtls_mpi T;
+
+    if( radix < 2 || radix > 16 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+
+    n = mbedtls_mpi_bitlen( X );
+    if( radix >=  4 ) n >>= 1;
+    if( radix >= 16 ) n >>= 1;
+    /*
+     * Round up the buffer length to an even value to ensure that there is
+     * enough room for hexadecimal values that can be represented in an odd
+     * number of digits.
+     */
+    n += 3 + ( ( n + 1 ) & 1 );
+
+    if( buflen < n )
+    {
+        *olen = n;
+        return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
+    }
+
+    p = buf;
+    mbedtls_mpi_init( &T );
+
+    if( X->s == -1 )
+        *p++ = '-';
+
+    if( radix == 16 )
+    {
+        int c;
+        size_t i, j, k;
+
+        for( i = X->n, k = 0; i > 0; i-- )
+        {
+            for( j = ciL; j > 0; j-- )
+            {
+                c = ( X->p[i - 1] >> ( ( j - 1 ) << 3) ) & 0xFF;
+
+                if( c == 0 && k == 0 && ( i + j ) != 2 )
+                    continue;
+
+                *(p++) = "0123456789ABCDEF" [c / 16];
+                *(p++) = "0123456789ABCDEF" [c % 16];
+                k = 1;
+            }
+        }
+    }
+    else
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &T, X ) );
+
+        if( T.s == -1 )
+            T.s = 1;
+
+        MBEDTLS_MPI_CHK( mpi_write_hlp( &T, radix, &p ) );
+    }
+
+    *p++ = '\0';
+    *olen = p - buf;
+
+cleanup:
+
+    mbedtls_mpi_free( &T );
+
+    return( ret );
+}
+
+#if defined(MBEDTLS_FS_IO)
+/*
+ * Read X from an opened file
+ */
+int mbedtls_mpi_read_file( mbedtls_mpi *X, int radix, FILE *fin )
+{
+    mbedtls_mpi_uint d;
+    size_t slen;
+    char *p;
+    /*
+     * Buffer should have space for (short) label and decimal formatted MPI,
+     * newline characters and '\0'
+     */
+    char s[ MBEDTLS_MPI_RW_BUFFER_SIZE ];
+
+    memset( s, 0, sizeof( s ) );
+    if( fgets( s, sizeof( s ) - 1, fin ) == NULL )
+        return( MBEDTLS_ERR_MPI_FILE_IO_ERROR );
+
+    slen = strlen( s );
+    if( slen == sizeof( s ) - 2 )
+        return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
+
+    if( s[slen - 1] == '\n' ) { slen--; s[slen] = '\0'; }
+    if( s[slen - 1] == '\r' ) { slen--; s[slen] = '\0'; }
+
+    p = s + slen;
+    while( --p >= s )
+        if( mpi_get_digit( &d, radix, *p ) != 0 )
+            break;
+
+    return( mbedtls_mpi_read_string( X, radix, p + 1 ) );
+}
+
+/*
+ * Write X into an opened file (or stdout if fout == NULL)
+ */
+int mbedtls_mpi_write_file( const char *p, const mbedtls_mpi *X, int radix, FILE *fout )
+{
+    int ret;
+    size_t n, slen, plen;
+    /*
+     * Buffer should have space for (short) label and decimal formatted MPI,
+     * newline characters and '\0'
+     */
+    char s[ MBEDTLS_MPI_RW_BUFFER_SIZE ];
+
+    memset( s, 0, sizeof( s ) );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_write_string( X, radix, s, sizeof( s ) - 2, &n ) );
+
+    if( p == NULL ) p = "";
+
+    plen = strlen( p );
+    slen = strlen( s );
+    s[slen++] = '\r';
+    s[slen++] = '\n';
+
+    if( fout != NULL )
+    {
+        if( fwrite( p, 1, plen, fout ) != plen ||
+            fwrite( s, 1, slen, fout ) != slen )
+            return( MBEDTLS_ERR_MPI_FILE_IO_ERROR );
+    }
+    else
+        mbedtls_printf( "%s%s", p, s );
+
+cleanup:
+
+    return( ret );
+}
+#endif /* MBEDTLS_FS_IO */
+
+/*
+ * Import X from unsigned binary data, big endian
+ */
+int mbedtls_mpi_read_binary( mbedtls_mpi *X, const unsigned char *buf, size_t buflen )
+{
+    int ret;
+    size_t i, j, n;
+
+    for( n = 0; n < buflen; n++ )
+        if( buf[n] != 0 )
+            break;
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, CHARS_TO_LIMBS( buflen - n ) ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) );
+
+    for( i = buflen, j = 0; i > n; i--, j++ )
+        X->p[j / ciL] |= ((mbedtls_mpi_uint) buf[i - 1]) << ((j % ciL) << 3);
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Export X into unsigned binary data, big endian
+ */
+int mbedtls_mpi_write_binary( const mbedtls_mpi *X, unsigned char *buf, size_t buflen )
+{
+    size_t i, j, n;
+
+    n = mbedtls_mpi_size( X );
+
+    if( buflen < n )
+        return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
+
+    memset( buf, 0, buflen );
+
+    for( i = buflen - 1, j = 0; n > 0; i--, j++, n-- )
+        buf[i] = (unsigned char)( X->p[j / ciL] >> ((j % ciL) << 3) );
+
+    return( 0 );
+}
+
+/*
+ * Left-shift: X <<= count
+ */
+int mbedtls_mpi_shift_l( mbedtls_mpi *X, size_t count )
+{
+    int ret;
+    size_t i, v0, t1;
+    mbedtls_mpi_uint r0 = 0, r1;
+
+    v0 = count / (biL    );
+    t1 = count & (biL - 1);
+
+    i = mbedtls_mpi_bitlen( X ) + count;
+
+    if( X->n * biL < i )
+        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, BITS_TO_LIMBS( i ) ) );
+
+    ret = 0;
+
+    /*
+     * shift by count / limb_size
+     */
+    if( v0 > 0 )
+    {
+        for( i = X->n; i > v0; i-- )
+            X->p[i - 1] = X->p[i - v0 - 1];
+
+        for( ; i > 0; i-- )
+            X->p[i - 1] = 0;
+    }
+
+    /*
+     * shift by count % limb_size
+     */
+    if( t1 > 0 )
+    {
+        for( i = v0; i < X->n; i++ )
+        {
+            r1 = X->p[i] >> (biL - t1);
+            X->p[i] <<= t1;
+            X->p[i] |= r0;
+            r0 = r1;
+        }
+    }
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Right-shift: X >>= count
+ */
+int mbedtls_mpi_shift_r( mbedtls_mpi *X, size_t count )
+{
+    size_t i, v0, v1;
+    mbedtls_mpi_uint r0 = 0, r1;
+
+    v0 = count /  biL;
+    v1 = count & (biL - 1);
+
+    if( v0 > X->n || ( v0 == X->n && v1 > 0 ) )
+        return mbedtls_mpi_lset( X, 0 );
+
+    /*
+     * shift by count / limb_size
+     */
+    if( v0 > 0 )
+    {
+        for( i = 0; i < X->n - v0; i++ )
+            X->p[i] = X->p[i + v0];
+
+        for( ; i < X->n; i++ )
+            X->p[i] = 0;
+    }
+
+    /*
+     * shift by count % limb_size
+     */
+    if( v1 > 0 )
+    {
+        for( i = X->n; i > 0; i-- )
+        {
+            r1 = X->p[i - 1] << (biL - v1);
+            X->p[i - 1] >>= v1;
+            X->p[i - 1] |= r0;
+            r0 = r1;
+        }
+    }
+
+    return( 0 );
+}
+
+/*
+ * Compare unsigned values
+ */
+int mbedtls_mpi_cmp_abs( const mbedtls_mpi *X, const mbedtls_mpi *Y )
+{
+    size_t i, j;
+
+    for( i = X->n; i > 0; i-- )
+        if( X->p[i - 1] != 0 )
+            break;
+
+    for( j = Y->n; j > 0; j-- )
+        if( Y->p[j - 1] != 0 )
+            break;
+
+    if( i == 0 && j == 0 )
+        return( 0 );
+
+    if( i > j ) return(  1 );
+    if( j > i ) return( -1 );
+
+    for( ; i > 0; i-- )
+    {
+        if( X->p[i - 1] > Y->p[i - 1] ) return(  1 );
+        if( X->p[i - 1] < Y->p[i - 1] ) return( -1 );
+    }
+
+    return( 0 );
+}
+
+/*
+ * Compare signed values
+ */
+int mbedtls_mpi_cmp_mpi( const mbedtls_mpi *X, const mbedtls_mpi *Y )
+{
+    size_t i, j;
+
+    for( i = X->n; i > 0; i-- )
+        if( X->p[i - 1] != 0 )
+            break;
+
+    for( j = Y->n; j > 0; j-- )
+        if( Y->p[j - 1] != 0 )
+            break;
+
+    if( i == 0 && j == 0 )
+        return( 0 );
+
+    if( i > j ) return(  X->s );
+    if( j > i ) return( -Y->s );
+
+    if( X->s > 0 && Y->s < 0 ) return(  1 );
+    if( Y->s > 0 && X->s < 0 ) return( -1 );
+
+    for( ; i > 0; i-- )
+    {
+        if( X->p[i - 1] > Y->p[i - 1] ) return(  X->s );
+        if( X->p[i - 1] < Y->p[i - 1] ) return( -X->s );
+    }
+
+    return( 0 );
+}
+
+/*
+ * Compare signed values
+ */
+int mbedtls_mpi_cmp_int( const mbedtls_mpi *X, mbedtls_mpi_sint z )
+{
+    mbedtls_mpi Y;
+    mbedtls_mpi_uint p[1];
+
+    *p  = ( z < 0 ) ? -z : z;
+    Y.s = ( z < 0 ) ? -1 : 1;
+    Y.n = 1;
+    Y.p = p;
+
+    return( mbedtls_mpi_cmp_mpi( X, &Y ) );
+}
+
+/*
+ * Unsigned addition: X = |A| + |B|  (HAC 14.7)
+ */
+int mbedtls_mpi_add_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
+{
+    int ret;
+    size_t i, j;
+    mbedtls_mpi_uint *o, *p, c, tmp;
+
+    if( X == B )
+    {
+        const mbedtls_mpi *T = A; A = X; B = T;
+    }
+
+    if( X != A )
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, A ) );
+
+    /*
+     * X should always be positive as a result of unsigned additions.
+     */
+    X->s = 1;
+
+    for( j = B->n; j > 0; j-- )
+        if( B->p[j - 1] != 0 )
+            break;
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, j ) );
+
+    o = B->p; p = X->p; c = 0;
+
+    /*
+     * tmp is used because it might happen that p == o
+     */
+    for( i = 0; i < j; i++, o++, p++ )
+    {
+        tmp= *o;
+        *p +=  c; c  = ( *p <  c );
+        *p += tmp; c += ( *p < tmp );
+    }
+
+    while( c != 0 )
+    {
+        if( i >= X->n )
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i + 1 ) );
+            p = X->p + i;
+        }
+
+        *p += c; c = ( *p < c ); i++; p++;
+    }
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Helper for mbedtls_mpi subtraction
+ */
+static void mpi_sub_hlp( size_t n, mbedtls_mpi_uint *s, mbedtls_mpi_uint *d )
+{
+    size_t i;
+    mbedtls_mpi_uint c, z;
+
+    for( i = c = 0; i < n; i++, s++, d++ )
+    {
+        z = ( *d <  c );     *d -=  c;
+        c = ( *d < *s ) + z; *d -= *s;
+    }
+
+    while( c != 0 )
+    {
+        z = ( *d < c ); *d -= c;
+        c = z; i++; d++;
+    }
+}
+
+/*
+ * Unsigned subtraction: X = |A| - |B|  (HAC 14.9)
+ */
+int mbedtls_mpi_sub_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
+{
+    mbedtls_mpi TB;
+    int ret;
+    size_t n;
+
+    if( mbedtls_mpi_cmp_abs( A, B ) < 0 )
+        return( MBEDTLS_ERR_MPI_NEGATIVE_VALUE );
+
+    mbedtls_mpi_init( &TB );
+
+    if( X == B )
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) );
+        B = &TB;
+    }
+
+    if( X != A )
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, A ) );
+
+    /*
+     * X should always be positive as a result of unsigned subtractions.
+     */
+    X->s = 1;
+
+    ret = 0;
+
+    for( n = B->n; n > 0; n-- )
+        if( B->p[n - 1] != 0 )
+            break;
+
+    mpi_sub_hlp( n, B->p, X->p );
+
+cleanup:
+
+    mbedtls_mpi_free( &TB );
+
+    return( ret );
+}
+
+/*
+ * Signed addition: X = A + B
+ */
+int mbedtls_mpi_add_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
+{
+    int ret, s = A->s;
+
+    if( A->s * B->s < 0 )
+    {
+        if( mbedtls_mpi_cmp_abs( A, B ) >= 0 )
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( X, A, B ) );
+            X->s =  s;
+        }
+        else
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( X, B, A ) );
+            X->s = -s;
+        }
+    }
+    else
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( X, A, B ) );
+        X->s = s;
+    }
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Signed subtraction: X = A - B
+ */
+int mbedtls_mpi_sub_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
+{
+    int ret, s = A->s;
+
+    if( A->s * B->s > 0 )
+    {
+        if( mbedtls_mpi_cmp_abs( A, B ) >= 0 )
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( X, A, B ) );
+            X->s =  s;
+        }
+        else
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( X, B, A ) );
+            X->s = -s;
+        }
+    }
+    else
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( X, A, B ) );
+        X->s = s;
+    }
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Signed addition: X = A + b
+ */
+int mbedtls_mpi_add_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b )
+{
+    mbedtls_mpi _B;
+    mbedtls_mpi_uint p[1];
+
+    p[0] = ( b < 0 ) ? -b : b;
+    _B.s = ( b < 0 ) ? -1 : 1;
+    _B.n = 1;
+    _B.p = p;
+
+    return( mbedtls_mpi_add_mpi( X, A, &_B ) );
+}
+
+/*
+ * Signed subtraction: X = A - b
+ */
+int mbedtls_mpi_sub_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b )
+{
+    mbedtls_mpi _B;
+    mbedtls_mpi_uint p[1];
+
+    p[0] = ( b < 0 ) ? -b : b;
+    _B.s = ( b < 0 ) ? -1 : 1;
+    _B.n = 1;
+    _B.p = p;
+
+    return( mbedtls_mpi_sub_mpi( X, A, &_B ) );
+}
+
+/*
+ * Helper for mbedtls_mpi multiplication
+ */
+static
+#if defined(__APPLE__) && defined(__arm__)
+/*
+ * Apple LLVM version 4.2 (clang-425.0.24) (based on LLVM 3.2svn)
+ * appears to need this to prevent bad ARM code generation at -O3.
+ */
+__attribute__ ((noinline))
+#endif
+void mpi_mul_hlp( size_t i, mbedtls_mpi_uint *s, mbedtls_mpi_uint *d, mbedtls_mpi_uint b )
+{
+    mbedtls_mpi_uint c = 0, t = 0;
+
+#if defined(MULADDC_HUIT)
+    for( ; i >= 8; i -= 8 )
+    {
+        MULADDC_INIT
+        MULADDC_HUIT
+        MULADDC_STOP
+    }
+
+    for( ; i > 0; i-- )
+    {
+        MULADDC_INIT
+        MULADDC_CORE
+        MULADDC_STOP
+    }
+#else /* MULADDC_HUIT */
+    for( ; i >= 16; i -= 16 )
+    {
+        MULADDC_INIT
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_STOP
+    }
+
+    for( ; i >= 8; i -= 8 )
+    {
+        MULADDC_INIT
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_CORE   MULADDC_CORE
+        MULADDC_STOP
+    }
+
+    for( ; i > 0; i-- )
+    {
+        MULADDC_INIT
+        MULADDC_CORE
+        MULADDC_STOP
+    }
+#endif /* MULADDC_HUIT */
+
+    t++;
+
+    do {
+        *d += c; c = ( *d < c ); d++;
+    }
+    while( c != 0 );
+}
+
+/*
+ * Baseline multiplication: X = A * B  (HAC 14.12)
+ */
+int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
+{
+    int ret;
+    size_t i, j;
+    mbedtls_mpi TA, TB;
+
+    mbedtls_mpi_init( &TA ); mbedtls_mpi_init( &TB );
+
+    if( X == A ) { MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TA, A ) ); A = &TA; }
+    if( X == B ) { MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) ); B = &TB; }
+
+    for( i = A->n; i > 0; i-- )
+        if( A->p[i - 1] != 0 )
+            break;
+
+    for( j = B->n; j > 0; j-- )
+        if( B->p[j - 1] != 0 )
+            break;
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i + j ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) );
+
+    for( i++; j > 0; j-- )
+        mpi_mul_hlp( i - 1, A->p, X->p + j - 1, B->p[j - 1] );
+
+    X->s = A->s * B->s;
+
+cleanup:
+
+    mbedtls_mpi_free( &TB ); mbedtls_mpi_free( &TA );
+
+    return( ret );
+}
+
+/*
+ * Baseline multiplication: X = A * b
+ */
+int mbedtls_mpi_mul_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_uint b )
+{
+    mbedtls_mpi _B;
+    mbedtls_mpi_uint p[1];
+
+    _B.s = 1;
+    _B.n = 1;
+    _B.p = p;
+    p[0] = b;
+
+    return( mbedtls_mpi_mul_mpi( X, A, &_B ) );
+}
+
+/*
+ * Unsigned integer divide - double mbedtls_mpi_uint dividend, u1/u0, and
+ * mbedtls_mpi_uint divisor, d
+ */
+static mbedtls_mpi_uint mbedtls_int_div_int( mbedtls_mpi_uint u1,
+            mbedtls_mpi_uint u0, mbedtls_mpi_uint d, mbedtls_mpi_uint *r )
+{
+#if defined(MBEDTLS_HAVE_UDBL)
+    mbedtls_t_udbl dividend, quotient;
+#else
+    const mbedtls_mpi_uint radix = (mbedtls_mpi_uint) 1 << biH;
+    const mbedtls_mpi_uint uint_halfword_mask = ( (mbedtls_mpi_uint) 1 << biH ) - 1;
+    mbedtls_mpi_uint d0, d1, q0, q1, rAX, r0, quotient;
+    mbedtls_mpi_uint u0_msw, u0_lsw;
+    size_t s;
+#endif
+
+    /*
+     * Check for overflow
+     */
+    if( 0 == d || u1 >= d )
+    {
+        if (r != NULL) *r = ~0;
+
+        return ( ~0 );
+    }
+
+#if defined(MBEDTLS_HAVE_UDBL)
+    dividend  = (mbedtls_t_udbl) u1 << biL;
+    dividend |= (mbedtls_t_udbl) u0;
+    quotient = dividend / d;
+    if( quotient > ( (mbedtls_t_udbl) 1 << biL ) - 1 )
+        quotient = ( (mbedtls_t_udbl) 1 << biL ) - 1;
+
+    if( r != NULL )
+        *r = (mbedtls_mpi_uint)( dividend - (quotient * d ) );
+
+    return (mbedtls_mpi_uint) quotient;
+#else
+
+    /*
+     * Algorithm D, Section 4.3.1 - The Art of Computer Programming
+     *   Vol. 2 - Seminumerical Algorithms, Knuth
+     */
+
+    /*
+     * Normalize the divisor, d, and dividend, u0, u1
+     */
+    s = mbedtls_clz( d );
+    d = d << s;
+
+    u1 = u1 << s;
+    u1 |= ( u0 >> ( biL - s ) ) & ( -(mbedtls_mpi_sint)s >> ( biL - 1 ) );
+    u0 =  u0 << s;
+
+    d1 = d >> biH;
+    d0 = d & uint_halfword_mask;
+
+    u0_msw = u0 >> biH;
+    u0_lsw = u0 & uint_halfword_mask;
+
+    /*
+     * Find the first quotient and remainder
+     */
+    q1 = u1 / d1;
+    r0 = u1 - d1 * q1;
+
+    while( q1 >= radix || ( q1 * d0 > radix * r0 + u0_msw ) )
+    {
+        q1 -= 1;
+        r0 += d1;
+
+        if ( r0 >= radix ) break;
+    }
+
+    rAX = ( u1 * radix ) + ( u0_msw - q1 * d );
+    q0 = rAX / d1;
+    r0 = rAX - q0 * d1;
+
+    while( q0 >= radix || ( q0 * d0 > radix * r0 + u0_lsw ) )
+    {
+        q0 -= 1;
+        r0 += d1;
+
+        if ( r0 >= radix ) break;
+    }
+
+    if (r != NULL)
+        *r = ( rAX * radix + u0_lsw - q0 * d ) >> s;
+
+    quotient = q1 * radix + q0;
+
+    return quotient;
+#endif
+}
+
+/*
+ * Division by mbedtls_mpi: A = Q * B + R  (HAC 14.20)
+ */
+int mbedtls_mpi_div_mpi( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A, const mbedtls_mpi *B )
+{
+    int ret;
+    size_t i, n, t, k;
+    mbedtls_mpi X, Y, Z, T1, T2;
+
+    if( mbedtls_mpi_cmp_int( B, 0 ) == 0 )
+        return( MBEDTLS_ERR_MPI_DIVISION_BY_ZERO );
+
+    mbedtls_mpi_init( &X ); mbedtls_mpi_init( &Y ); mbedtls_mpi_init( &Z );
+    mbedtls_mpi_init( &T1 ); mbedtls_mpi_init( &T2 );
+
+    if( mbedtls_mpi_cmp_abs( A, B ) < 0 )
+    {
+        if( Q != NULL ) MBEDTLS_MPI_CHK( mbedtls_mpi_lset( Q, 0 ) );
+        if( R != NULL ) MBEDTLS_MPI_CHK( mbedtls_mpi_copy( R, A ) );
+        return( 0 );
+    }
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &X, A ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &Y, B ) );
+    X.s = Y.s = 1;
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &Z, A->n + 2 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &Z,  0 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T1, 2 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T2, 3 ) );
+
+    k = mbedtls_mpi_bitlen( &Y ) % biL;
+    if( k < biL - 1 )
+    {
+        k = biL - 1 - k;
+        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &X, k ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &Y, k ) );
+    }
+    else k = 0;
+
+    n = X.n - 1;
+    t = Y.n - 1;
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &Y, biL * ( n - t ) ) );
+
+    while( mbedtls_mpi_cmp_mpi( &X, &Y ) >= 0 )
+    {
+        Z.p[n - t]++;
+        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &X, &X, &Y ) );
+    }
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &Y, biL * ( n - t ) ) );
+
+    for( i = n; i > t ; i-- )
+    {
+        if( X.p[i] >= Y.p[t] )
+            Z.p[i - t - 1] = ~0;
+        else
+        {
+            Z.p[i - t - 1] = mbedtls_int_div_int( X.p[i], X.p[i - 1],
+                                                            Y.p[t], NULL);
+        }
+
+        Z.p[i - t - 1]++;
+        do
+        {
+            Z.p[i - t - 1]--;
+
+            MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &T1, 0 ) );
+            T1.p[0] = ( t < 1 ) ? 0 : Y.p[t - 1];
+            T1.p[1] = Y.p[t];
+            MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &T1, &T1, Z.p[i - t - 1] ) );
+
+            MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &T2, 0 ) );
+            T2.p[0] = ( i < 2 ) ? 0 : X.p[i - 2];
+            T2.p[1] = ( i < 1 ) ? 0 : X.p[i - 1];
+            T2.p[2] = X.p[i];
+        }
+        while( mbedtls_mpi_cmp_mpi( &T1, &T2 ) > 0 );
+
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &T1, &Y, Z.p[i - t - 1] ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &T1,  biL * ( i - t - 1 ) ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &X, &X, &T1 ) );
+
+        if( mbedtls_mpi_cmp_int( &X, 0 ) < 0 )
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &T1, &Y ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &T1, biL * ( i - t - 1 ) ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &X, &X, &T1 ) );
+            Z.p[i - t - 1]--;
+        }
+    }
+
+    if( Q != NULL )
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( Q, &Z ) );
+        Q->s = A->s * B->s;
+    }
+
+    if( R != NULL )
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &X, k ) );
+        X.s = A->s;
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( R, &X ) );
+
+        if( mbedtls_mpi_cmp_int( R, 0 ) == 0 )
+            R->s = 1;
+    }
+
+cleanup:
+
+    mbedtls_mpi_free( &X ); mbedtls_mpi_free( &Y ); mbedtls_mpi_free( &Z );
+    mbedtls_mpi_free( &T1 ); mbedtls_mpi_free( &T2 );
+
+    return( ret );
+}
+
+/*
+ * Division by int: A = Q * b + R
+ */
+int mbedtls_mpi_div_int( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A, mbedtls_mpi_sint b )
+{
+    mbedtls_mpi _B;
+    mbedtls_mpi_uint p[1];
+
+    p[0] = ( b < 0 ) ? -b : b;
+    _B.s = ( b < 0 ) ? -1 : 1;
+    _B.n = 1;
+    _B.p = p;
+
+    return( mbedtls_mpi_div_mpi( Q, R, A, &_B ) );
+}
+
+/*
+ * Modulo: R = A mod B
+ */
+int mbedtls_mpi_mod_mpi( mbedtls_mpi *R, const mbedtls_mpi *A, const mbedtls_mpi *B )
+{
+    int ret;
+
+    if( mbedtls_mpi_cmp_int( B, 0 ) < 0 )
+        return( MBEDTLS_ERR_MPI_NEGATIVE_VALUE );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_div_mpi( NULL, R, A, B ) );
+
+    while( mbedtls_mpi_cmp_int( R, 0 ) < 0 )
+      MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( R, R, B ) );
+
+    while( mbedtls_mpi_cmp_mpi( R, B ) >= 0 )
+      MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( R, R, B ) );
+
+cleanup:
+
+    return( ret );
+}
+
+/*
+ * Modulo: r = A mod b
+ */
+int mbedtls_mpi_mod_int( mbedtls_mpi_uint *r, const mbedtls_mpi *A, mbedtls_mpi_sint b )
+{
+    size_t i;
+    mbedtls_mpi_uint x, y, z;
+
+    if( b == 0 )
+        return( MBEDTLS_ERR_MPI_DIVISION_BY_ZERO );
+
+    if( b < 0 )
+        return( MBEDTLS_ERR_MPI_NEGATIVE_VALUE );
+
+    /*
+     * handle trivial cases
+     */
+    if( b == 1 )
+    {
+        *r = 0;
+        return( 0 );
+    }
+
+    if( b == 2 )
+    {
+        *r = A->p[0] & 1;
+        return( 0 );
+    }
+
+    /*
+     * general case
+     */
+    for( i = A->n, y = 0; i > 0; i-- )
+    {
+        x  = A->p[i - 1];
+        y  = ( y << biH ) | ( x >> biH );
+        z  = y / b;
+        y -= z * b;
+
+        x <<= biH;
+        y  = ( y << biH ) | ( x >> biH );
+        z  = y / b;
+        y -= z * b;
+    }
+
+    /*
+     * If A is negative, then the current y represents a negative value.
+     * Flipping it to the positive side.
+     */
+    if( A->s < 0 && y != 0 )
+        y = b - y;
+
+    *r = y;
+
+    return( 0 );
+}
+
+/*
+ * Fast Montgomery initialization (thanks to Tom St Denis)
+ */
+static void mpi_montg_init( mbedtls_mpi_uint *mm, const mbedtls_mpi *N )
+{
+    mbedtls_mpi_uint x, m0 = N->p[0];
+    unsigned int i;
+
+    x  = m0;
+    x += ( ( m0 + 2 ) & 4 ) << 1;
+
+    for( i = biL; i >= 8; i /= 2 )
+        x *= ( 2 - ( m0 * x ) );
+
+    *mm = ~x + 1;
+}
+
+/*
+ * Montgomery multiplication: A = A * B * R^-1 mod N  (HAC 14.36)
+ */
+static int mpi_montmul( mbedtls_mpi *A, const mbedtls_mpi *B, const mbedtls_mpi *N, mbedtls_mpi_uint mm,
+                         const mbedtls_mpi *T )
+{
+    size_t i, n, m;
+    mbedtls_mpi_uint u0, u1, *d;
+
+    if( T->n < N->n + 1 || T->p == NULL )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+
+    memset( T->p, 0, T->n * ciL );
+
+    d = T->p;
+    n = N->n;
+    m = ( B->n < n ) ? B->n : n;
+
+    for( i = 0; i < n; i++ )
+    {
+        /*
+         * T = (T + u0*B + u1*N) / 2^biL
+         */
+        u0 = A->p[i];
+        u1 = ( d[0] + u0 * B->p[0] ) * mm;
+
+        mpi_mul_hlp( m, B->p, d, u0 );
+        mpi_mul_hlp( n, N->p, d, u1 );
+
+        *d++ = u0; d[n + 1] = 0;
+    }
+
+    memcpy( A->p, d, ( n + 1 ) * ciL );
+
+    if( mbedtls_mpi_cmp_abs( A, N ) >= 0 )
+        mpi_sub_hlp( n, N->p, A->p );
+    else
+        /* prevent timing attacks */
+        mpi_sub_hlp( n, A->p, T->p );
+
+    return( 0 );
+}
+
+/*
+ * Montgomery reduction: A = A * R^-1 mod N
+ */
+static int mpi_montred( mbedtls_mpi *A, const mbedtls_mpi *N, mbedtls_mpi_uint mm, const mbedtls_mpi *T )
+{
+    mbedtls_mpi_uint z = 1;
+    mbedtls_mpi U;
+
+    U.n = U.s = (int) z;
+    U.p = &z;
+
+    return( mpi_montmul( A, &U, N, mm, T ) );
+}
+
+/*
+ * Sliding-window exponentiation: X = A^E mod N  (HAC 14.85)
+ */
+int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *E, const mbedtls_mpi *N, mbedtls_mpi *_RR )
+{
+    int ret;
+    size_t wbits, wsize, one = 1;
+    size_t i, j, nblimbs;
+    size_t bufsize, nbits;
+    mbedtls_mpi_uint ei, mm, state;
+    mbedtls_mpi RR, T, W[ 2 << MBEDTLS_MPI_WINDOW_SIZE ], Apos;
+    int neg;
+
+    if( mbedtls_mpi_cmp_int( N, 0 ) < 0 || ( N->p[0] & 1 ) == 0 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+
+    if( mbedtls_mpi_cmp_int( E, 0 ) < 0 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+
+    /*
+     * Init temps and window size
+     */
+    mpi_montg_init( &mm, N );
+    mbedtls_mpi_init( &RR ); mbedtls_mpi_init( &T );
+    mbedtls_mpi_init( &Apos );
+    memset( W, 0, sizeof( W ) );
+
+    i = mbedtls_mpi_bitlen( E );
+
+    wsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
+            ( i >  79 ) ? 4 : ( i >  23 ) ? 3 : 1;
+
+    if( wsize > MBEDTLS_MPI_WINDOW_SIZE )
+        wsize = MBEDTLS_MPI_WINDOW_SIZE;
+
+    j = N->n + 1;
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, j ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[1],  j ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) );
+
+    /*
+     * Compensate for negative A (and correct at the end)
+     */
+    neg = ( A->s == -1 );
+    if( neg )
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &Apos, A ) );
+        Apos.s = 1;
+        A = &Apos;
+    }
+
+    /*
+     * If 1st call, pre-compute R^2 mod N
+     */
+    if( _RR == NULL || _RR->p == NULL )
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &RR, 1 ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &RR, N->n * 2 * biL ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &RR, &RR, N ) );
+
+        if( _RR != NULL )
+            memcpy( _RR, &RR, sizeof( mbedtls_mpi ) );
+    }
+    else
+        memcpy( &RR, _RR, sizeof( mbedtls_mpi ) );
+
+    /*
+     * W[1] = A * R^2 * R^-1 mod N = A * R mod N
+     */
+    if( mbedtls_mpi_cmp_mpi( A, N ) >= 0 )
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &W[1], A, N ) );
+    else
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[1], A ) );
+
+    MBEDTLS_MPI_CHK( mpi_montmul( &W[1], &RR, N, mm, &T ) );
+
+    /*
+     * X = R^2 * R^-1 mod N = R mod N
+     */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, &RR ) );
+    MBEDTLS_MPI_CHK( mpi_montred( X, N, mm, &T ) );
+
+    if( wsize > 1 )
+    {
+        /*
+         * W[1 << (wsize - 1)] = W[1] ^ (wsize - 1)
+         */
+        j =  one << ( wsize - 1 );
+
+        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[j], N->n + 1 ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[j], &W[1]    ) );
+
+        for( i = 0; i < wsize - 1; i++ )
+            MBEDTLS_MPI_CHK( mpi_montmul( &W[j], &W[j], N, mm, &T ) );
+
+        /*
+         * W[i] = W[i - 1] * W[1]
+         */
+        for( i = j + 1; i < ( one << wsize ); i++ )
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[i], N->n + 1 ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[i], &W[i - 1] ) );
+
+            MBEDTLS_MPI_CHK( mpi_montmul( &W[i], &W[1], N, mm, &T ) );
+        }
+    }
+
+    nblimbs = E->n;
+    bufsize = 0;
+    nbits   = 0;
+    wbits   = 0;
+    state   = 0;
+
+    while( 1 )
+    {
+        if( bufsize == 0 )
+        {
+            if( nblimbs == 0 )
+                break;
+
+            nblimbs--;
+
+            bufsize = sizeof( mbedtls_mpi_uint ) << 3;
+        }
+
+        bufsize--;
+
+        ei = (E->p[nblimbs] >> bufsize) & 1;
+
+        /*
+         * skip leading 0s
+         */
+        if( ei == 0 && state == 0 )
+            continue;
+
+        if( ei == 0 && state == 1 )
+        {
+            /*
+             * out of window, square X
+             */
+            MBEDTLS_MPI_CHK( mpi_montmul( X, X, N, mm, &T ) );
+            continue;
+        }
+
+        /*
+         * add ei to current window
+         */
+        state = 2;
+
+        nbits++;
+        wbits |= ( ei << ( wsize - nbits ) );
+
+        if( nbits == wsize )
+        {
+            /*
+             * X = X^wsize R^-1 mod N
+             */
+            for( i = 0; i < wsize; i++ )
+                MBEDTLS_MPI_CHK( mpi_montmul( X, X, N, mm, &T ) );
+
+            /*
+             * X = X * W[wbits] R^-1 mod N
+             */
+            MBEDTLS_MPI_CHK( mpi_montmul( X, &W[wbits], N, mm, &T ) );
+
+            state--;
+            nbits = 0;
+            wbits = 0;
+        }
+    }
+
+    /*
+     * process the remaining bits
+     */
+    for( i = 0; i < nbits; i++ )
+    {
+        MBEDTLS_MPI_CHK( mpi_montmul( X, X, N, mm, &T ) );
+
+        wbits <<= 1;
+
+        if( ( wbits & ( one << wsize ) ) != 0 )
+            MBEDTLS_MPI_CHK( mpi_montmul( X, &W[1], N, mm, &T ) );
+    }
+
+    /*
+     * X = A^E * R * R^-1 mod N = A^E mod N
+     */
+    MBEDTLS_MPI_CHK( mpi_montred( X, N, mm, &T ) );
+
+    if( neg )
+    {
+        X->s = -1;
+        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( X, N, X ) );
+    }
+
+cleanup:
+
+    for( i = ( one << ( wsize - 1 ) ); i < ( one << wsize ); i++ )
+        mbedtls_mpi_free( &W[i] );
+
+    mbedtls_mpi_free( &W[1] ); mbedtls_mpi_free( &T ); mbedtls_mpi_free( &Apos );
+
+    if( _RR == NULL || _RR->p == NULL )
+        mbedtls_mpi_free( &RR );
+
+    return( ret );
+}
+
+/*
+ * Greatest common divisor: G = gcd(A, B)  (HAC 14.54)
+ */
+int mbedtls_mpi_gcd( mbedtls_mpi *G, const mbedtls_mpi *A, const mbedtls_mpi *B )
+{
+    int ret;
+    size_t lz, lzt;
+    mbedtls_mpi TG, TA, TB;
+
+    mbedtls_mpi_init( &TG ); mbedtls_mpi_init( &TA ); mbedtls_mpi_init( &TB );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TA, A ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) );
+
+    lz = mbedtls_mpi_lsb( &TA );
+    lzt = mbedtls_mpi_lsb( &TB );
+
+    if( lzt < lz )
+        lz = lzt;
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TA, lz ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TB, lz ) );
+
+    TA.s = TB.s = 1;
+
+    while( mbedtls_mpi_cmp_int( &TA, 0 ) != 0 )
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TA, mbedtls_mpi_lsb( &TA ) ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TB, mbedtls_mpi_lsb( &TB ) ) );
+
+        if( mbedtls_mpi_cmp_mpi( &TA, &TB ) >= 0 )
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( &TA, &TA, &TB ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TA, 1 ) );
+        }
+        else
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( &TB, &TB, &TA ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TB, 1 ) );
+        }
+    }
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &TB, lz ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( G, &TB ) );
+
+cleanup:
+
+    mbedtls_mpi_free( &TG ); mbedtls_mpi_free( &TA ); mbedtls_mpi_free( &TB );
+
+    return( ret );
+}
+
+/*
+ * Fill X with size bytes of random.
+ *
+ * Use a temporary bytes representation to make sure the result is the same
+ * regardless of the platform endianness (useful when f_rng is actually
+ * deterministic, eg for tests).
+ */
+int mbedtls_mpi_fill_random( mbedtls_mpi *X, size_t size,
+                     int (*f_rng)(void *, unsigned char *, size_t),
+                     void *p_rng )
+{
+    int ret;
+    unsigned char buf[MBEDTLS_MPI_MAX_SIZE];
+
+    if( size > MBEDTLS_MPI_MAX_SIZE )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+
+    MBEDTLS_MPI_CHK( f_rng( p_rng, buf, size ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_binary( X, buf, size ) );
+
+cleanup:
+    return( ret );
+}
+
+/*
+ * Modular inverse: X = A^-1 mod N  (HAC 14.61 / 14.64)
+ */
+int mbedtls_mpi_inv_mod( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *N )
+{
+    int ret;
+    mbedtls_mpi G, TA, TU, U1, U2, TB, TV, V1, V2;
+
+    if( mbedtls_mpi_cmp_int( N, 0 ) <= 0 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+
+    mbedtls_mpi_init( &TA ); mbedtls_mpi_init( &TU ); mbedtls_mpi_init( &U1 ); mbedtls_mpi_init( &U2 );
+    mbedtls_mpi_init( &G ); mbedtls_mpi_init( &TB ); mbedtls_mpi_init( &TV );
+    mbedtls_mpi_init( &V1 ); mbedtls_mpi_init( &V2 );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_gcd( &G, A, N ) );
+
+    if( mbedtls_mpi_cmp_int( &G, 1 ) != 0 )
+    {
+        ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
+        goto cleanup;
+    }
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &TA, A, N ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TU, &TA ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, N ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TV, N ) );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &U1, 1 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &U2, 0 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &V1, 0 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &V2, 1 ) );
+
+    do
+    {
+        while( ( TU.p[0] & 1 ) == 0 )
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TU, 1 ) );
+
+            if( ( U1.p[0] & 1 ) != 0 || ( U2.p[0] & 1 ) != 0 )
+            {
+                MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &U1, &U1, &TB ) );
+                MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &U2, &U2, &TA ) );
+            }
+
+            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &U1, 1 ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &U2, 1 ) );
+        }
+
+        while( ( TV.p[0] & 1 ) == 0 )
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TV, 1 ) );
+
+            if( ( V1.p[0] & 1 ) != 0 || ( V2.p[0] & 1 ) != 0 )
+            {
+                MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &V1, &V1, &TB ) );
+                MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V2, &V2, &TA ) );
+            }
+
+            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &V1, 1 ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &V2, 1 ) );
+        }
+
+        if( mbedtls_mpi_cmp_mpi( &TU, &TV ) >= 0 )
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &TU, &TU, &TV ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &U1, &U1, &V1 ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &U2, &U2, &V2 ) );
+        }
+        else
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &TV, &TV, &TU ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V1, &V1, &U1 ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V2, &V2, &U2 ) );
+        }
+    }
+    while( mbedtls_mpi_cmp_int( &TU, 0 ) != 0 );
+
+    while( mbedtls_mpi_cmp_int( &V1, 0 ) < 0 )
+        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &V1, &V1, N ) );
+
+    while( mbedtls_mpi_cmp_mpi( &V1, N ) >= 0 )
+        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V1, &V1, N ) );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, &V1 ) );
+
+cleanup:
+
+    mbedtls_mpi_free( &TA ); mbedtls_mpi_free( &TU ); mbedtls_mpi_free( &U1 ); mbedtls_mpi_free( &U2 );
+    mbedtls_mpi_free( &G ); mbedtls_mpi_free( &TB ); mbedtls_mpi_free( &TV );
+    mbedtls_mpi_free( &V1 ); mbedtls_mpi_free( &V2 );
+
+    return( ret );
+}
+
+#if defined(MBEDTLS_GENPRIME)
+
+static const int small_prime[] =
+{
+        3,    5,    7,   11,   13,   17,   19,   23,
+       29,   31,   37,   41,   43,   47,   53,   59,
+       61,   67,   71,   73,   79,   83,   89,   97,
+      101,  103,  107,  109,  113,  127,  131,  137,
+      139,  149,  151,  157,  163,  167,  173,  179,
+      181,  191,  193,  197,  199,  211,  223,  227,
+      229,  233,  239,  241,  251,  257,  263,  269,
+      271,  277,  281,  283,  293,  307,  311,  313,
+      317,  331,  337,  347,  349,  353,  359,  367,
+      373,  379,  383,  389,  397,  401,  409,  419,
+      421,  431,  433,  439,  443,  449,  457,  461,
+      463,  467,  479,  487,  491,  499,  503,  509,
+      521,  523,  541,  547,  557,  563,  569,  571,
+      577,  587,  593,  599,  601,  607,  613,  617,
+      619,  631,  641,  643,  647,  653,  659,  661,
+      673,  677,  683,  691,  701,  709,  719,  727,
+      733,  739,  743,  751,  757,  761,  769,  773,
+      787,  797,  809,  811,  821,  823,  827,  829,
+      839,  853,  857,  859,  863,  877,  881,  883,
+      887,  907,  911,  919,  929,  937,  941,  947,
+      953,  967,  971,  977,  983,  991,  997, -103
+};
+
+/*
+ * Small divisors test (X must be positive)
+ *
+ * Return values:
+ * 0: no small factor (possible prime, more tests needed)
+ * 1: certain prime
+ * MBEDTLS_ERR_MPI_NOT_ACCEPTABLE: certain non-prime
+ * other negative: error
+ */
+static int mpi_check_small_factors( const mbedtls_mpi *X )
+{
+    int ret = 0;
+    size_t i;
+    mbedtls_mpi_uint r;
+
+    if( ( X->p[0] & 1 ) == 0 )
+        return( MBEDTLS_ERR_MPI_NOT_ACCEPTABLE );
+
+    for( i = 0; small_prime[i] > 0; i++ )
+    {
+        if( mbedtls_mpi_cmp_int( X, small_prime[i] ) <= 0 )
+            return( 1 );
+
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_int( &r, X, small_prime[i] ) );
+
+        if( r == 0 )
+            return( MBEDTLS_ERR_MPI_NOT_ACCEPTABLE );
+    }
+
+cleanup:
+    return( ret );
+}
+
+/*
+ * Miller-Rabin pseudo-primality test  (HAC 4.24)
+ */
+static int mpi_miller_rabin( const mbedtls_mpi *X,
+                             int (*f_rng)(void *, unsigned char *, size_t),
+                             void *p_rng )
+{
+    int ret, count;
+    size_t i, j, k, n, s;
+    mbedtls_mpi W, R, T, A, RR;
+
+    mbedtls_mpi_init( &W ); mbedtls_mpi_init( &R ); mbedtls_mpi_init( &T ); mbedtls_mpi_init( &A );
+    mbedtls_mpi_init( &RR );
+
+    /*
+     * W = |X| - 1
+     * R = W >> lsb( W )
+     */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_int( &W, X, 1 ) );
+    s = mbedtls_mpi_lsb( &W );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R, &W ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &R, s ) );
+
+    i = mbedtls_mpi_bitlen( X );
+    /*
+     * HAC, table 4.4
+     */
+    n = ( ( i >= 1300 ) ?  2 : ( i >=  850 ) ?  3 :
+          ( i >=  650 ) ?  4 : ( i >=  350 ) ?  8 :
+          ( i >=  250 ) ? 12 : ( i >=  150 ) ? 18 : 27 );
+
+    for( i = 0; i < n; i++ )
+    {
+        /*
+         * pick a random A, 1 < A < |X| - 1
+         */
+        MBEDTLS_MPI_CHK( mbedtls_mpi_fill_random( &A, X->n * ciL, f_rng, p_rng ) );
+
+        if( mbedtls_mpi_cmp_mpi( &A, &W ) >= 0 )
+        {
+            j = mbedtls_mpi_bitlen( &A ) - mbedtls_mpi_bitlen( &W );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &A, j + 1 ) );
+        }
+        A.p[0] |= 3;
+
+        count = 0;
+        do {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_fill_random( &A, X->n * ciL, f_rng, p_rng ) );
+
+            j = mbedtls_mpi_bitlen( &A );
+            k = mbedtls_mpi_bitlen( &W );
+            if (j > k) {
+                MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &A, j - k ) );
+            }
+
+            if (count++ > 30) {
+                return MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
+            }
+
+        } while ( mbedtls_mpi_cmp_mpi( &A, &W ) >= 0 ||
+                  mbedtls_mpi_cmp_int( &A, 1 )  <= 0    );
+
+        /*
+         * A = A^R mod |X|
+         */
+        MBEDTLS_MPI_CHK( mbedtls_mpi_exp_mod( &A, &A, &R, X, &RR ) );
+
+        if( mbedtls_mpi_cmp_mpi( &A, &W ) == 0 ||
+            mbedtls_mpi_cmp_int( &A,  1 ) == 0 )
+            continue;
+
+        j = 1;
+        while( j < s && mbedtls_mpi_cmp_mpi( &A, &W ) != 0 )
+        {
+            /*
+             * A = A * A mod |X|
+             */
+            MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &T, &A, &A ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &A, &T, X  ) );
+
+            if( mbedtls_mpi_cmp_int( &A, 1 ) == 0 )
+                break;
+
+            j++;
+        }
+
+        /*
+         * not prime if A != |X| - 1 or A == 1
+         */
+        if( mbedtls_mpi_cmp_mpi( &A, &W ) != 0 ||
+            mbedtls_mpi_cmp_int( &A,  1 ) == 0 )
+        {
+            ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
+            break;
+        }
+    }
+
+cleanup:
+    mbedtls_mpi_free( &W ); mbedtls_mpi_free( &R ); mbedtls_mpi_free( &T ); mbedtls_mpi_free( &A );
+    mbedtls_mpi_free( &RR );
+
+    return( ret );
+}
+
+/*
+ * Pseudo-primality test: small factors, then Miller-Rabin
+ */
+int mbedtls_mpi_is_prime( const mbedtls_mpi *X,
+                  int (*f_rng)(void *, unsigned char *, size_t),
+                  void *p_rng )
+{
+    int ret;
+    mbedtls_mpi XX;
+
+    XX.s = 1;
+    XX.n = X->n;
+    XX.p = X->p;
+
+    if( mbedtls_mpi_cmp_int( &XX, 0 ) == 0 ||
+        mbedtls_mpi_cmp_int( &XX, 1 ) == 0 )
+        return( MBEDTLS_ERR_MPI_NOT_ACCEPTABLE );
+
+    if( mbedtls_mpi_cmp_int( &XX, 2 ) == 0 )
+        return( 0 );
+
+    if( ( ret = mpi_check_small_factors( &XX ) ) != 0 )
+    {
+        if( ret == 1 )
+            return( 0 );
+
+        return( ret );
+    }
+
+    return( mpi_miller_rabin( &XX, f_rng, p_rng ) );
+}
+
+/*
+ * Prime number generation
+ */
+int mbedtls_mpi_gen_prime( mbedtls_mpi *X, size_t nbits, int dh_flag,
+                   int (*f_rng)(void *, unsigned char *, size_t),
+                   void *p_rng )
+{
+    int ret;
+    size_t k, n;
+    mbedtls_mpi_uint r;
+    mbedtls_mpi Y;
+
+    if( nbits < 3 || nbits > MBEDTLS_MPI_MAX_BITS )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+
+    mbedtls_mpi_init( &Y );
+
+    n = BITS_TO_LIMBS( nbits );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_fill_random( X, n * ciL, f_rng, p_rng ) );
+
+    k = mbedtls_mpi_bitlen( X );
+    if( k > nbits ) MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( X, k - nbits + 1 ) );
+
+    mbedtls_mpi_set_bit( X, nbits-1, 1 );
+
+    X->p[0] |= 1;
+
+    if( dh_flag == 0 )
+    {
+        while( ( ret = mbedtls_mpi_is_prime( X, f_rng, p_rng ) ) != 0 )
+        {
+            if( ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE )
+                goto cleanup;
+
+            MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, X, 2 ) );
+        }
+    }
+    else
+    {
+        /*
+         * An necessary condition for Y and X = 2Y + 1 to be prime
+         * is X = 2 mod 3 (which is equivalent to Y = 2 mod 3).
+         * Make sure it is satisfied, while keeping X = 3 mod 4
+         */
+
+        X->p[0] |= 2;
+
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_int( &r, X, 3 ) );
+        if( r == 0 )
+            MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, X, 8 ) );
+        else if( r == 1 )
+            MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, X, 4 ) );
+
+        /* Set Y = (X-1) / 2, which is X / 2 because X is odd */
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &Y, X ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &Y, 1 ) );
+
+        while( 1 )
+        {
+            /*
+             * First, check small factors for X and Y
+             * before doing Miller-Rabin on any of them
+             */
+            if( ( ret = mpi_check_small_factors(  X         ) ) == 0 &&
+                ( ret = mpi_check_small_factors( &Y         ) ) == 0 &&
+                ( ret = mpi_miller_rabin(  X, f_rng, p_rng  ) ) == 0 &&
+                ( ret = mpi_miller_rabin( &Y, f_rng, p_rng  ) ) == 0 )
+            {
+                break;
+            }
+
+            if( ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE )
+                goto cleanup;
+
+            /*
+             * Next candidates. We want to preserve Y = (X-1) / 2 and
+             * Y = 1 mod 2 and Y = 2 mod 3 (eq X = 3 mod 4 and X = 2 mod 3)
+             * so up Y by 6 and X by 12.
+             */
+            MBEDTLS_MPI_CHK( mbedtls_mpi_add_int(  X,  X, 12 ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( &Y, &Y, 6  ) );
+        }
+    }
+
+cleanup:
+
+    mbedtls_mpi_free( &Y );
+
+    return( ret );
+}
+
+#endif /* MBEDTLS_GENPRIME */
+
+#if defined(MBEDTLS_SELF_TEST)
+
+#define GCD_PAIR_COUNT  3
+
+static const int gcd_pairs[GCD_PAIR_COUNT][3] =
+{
+    { 693, 609, 21 },
+    { 1764, 868, 28 },
+    { 768454923, 542167814, 1 }
+};
+
+/*
+ * Checkup routine
+ */
+int mbedtls_mpi_self_test( int verbose )
+{
+    int ret, i;
+    mbedtls_mpi A, E, N, X, Y, U, V;
+
+    mbedtls_mpi_init( &A ); mbedtls_mpi_init( &E ); mbedtls_mpi_init( &N ); mbedtls_mpi_init( &X );
+    mbedtls_mpi_init( &Y ); mbedtls_mpi_init( &U ); mbedtls_mpi_init( &V );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &A, 16,
+        "EFE021C2645FD1DC586E69184AF4A31E" \
+        "D5F53E93B5F123FA41680867BA110131" \
+        "944FE7952E2517337780CB0DB80E61AA" \
+        "E7C8DDC6C5C6AADEB34EB38A2F40D5E6" ) );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &E, 16,
+        "B2E7EFD37075B9F03FF989C7C5051C20" \
+        "34D2A323810251127E7BF8625A4F49A5" \
+        "F3E27F4DA8BD59C47D6DAABA4C8127BD" \
+        "5B5C25763222FEFCCFC38B832366C29E" ) );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &N, 16,
+        "0066A198186C18C10B2F5ED9B522752A" \
+        "9830B69916E535C8F047518A889A43A5" \
+        "94B6BED27A168D31D4A52F88925AA8F5" ) );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &X, &A, &N ) );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16,
+        "602AB7ECA597A3D6B56FF9829A5E8B85" \
+        "9E857EA95A03512E2BAE7391688D264A" \
+        "A5663B0341DB9CCFD2C4C5F421FEC814" \
+        "8001B72E848A38CAE1C65F78E56ABDEF" \
+        "E12D3C039B8A02D6BE593F0BBBDA56F1" \
+        "ECF677152EF804370C1A305CAF3B5BF1" \
+        "30879B56C61DE584A0F53A2447A51E" ) );
+
+    if( verbose != 0 )
+        mbedtls_printf( "  MPI test #1 (mul_mpi): " );
+
+    if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 )
+    {
+        if( verbose != 0 )
+            mbedtls_printf( "failed\n" );
+
+        ret = 1;
+        goto cleanup;
+    }
+
+    if( verbose != 0 )
+        mbedtls_printf( "passed\n" );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_div_mpi( &X, &Y, &A, &N ) );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16,
+        "256567336059E52CAE22925474705F39A94" ) );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &V, 16,
+        "6613F26162223DF488E9CD48CC132C7A" \
+        "0AC93C701B001B092E4E5B9F73BCD27B" \
+        "9EE50D0657C77F374E903CDFA4C642" ) );
+
+    if( verbose != 0 )
+        mbedtls_printf( "  MPI test #2 (div_mpi): " );
+
+    if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 ||
+        mbedtls_mpi_cmp_mpi( &Y, &V ) != 0 )
+    {
+        if( verbose != 0 )
+            mbedtls_printf( "failed\n" );
+
+        ret = 1;
+        goto cleanup;
+    }
+
+    if( verbose != 0 )
+        mbedtls_printf( "passed\n" );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_exp_mod( &X, &A, &E, &N, NULL ) );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16,
+        "36E139AEA55215609D2816998ED020BB" \
+        "BD96C37890F65171D948E9BC7CBAA4D9" \
+        "325D24D6A3C12710F10A09FA08AB87" ) );
+
+    if( verbose != 0 )
+        mbedtls_printf( "  MPI test #3 (exp_mod): " );
+
+    if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 )
+    {
+        if( verbose != 0 )
+            mbedtls_printf( "failed\n" );
+
+        ret = 1;
+        goto cleanup;
+    }
+
+    if( verbose != 0 )
+        mbedtls_printf( "passed\n" );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_inv_mod( &X, &A, &N ) );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16,
+        "003A0AAEDD7E784FC07D8F9EC6E3BFD5" \
+        "C3DBA76456363A10869622EAC2DD84EC" \
+        "C5B8A74DAC4D09E03B5E0BE779F2DF61" ) );
+
+    if( verbose != 0 )
+        mbedtls_printf( "  MPI test #4 (inv_mod): " );
+
+    if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 )
+    {
+        if( verbose != 0 )
+            mbedtls_printf( "failed\n" );
+
+        ret = 1;
+        goto cleanup;
+    }
+
+    if( verbose != 0 )
+        mbedtls_printf( "passed\n" );
+
+    if( verbose != 0 )
+        mbedtls_printf( "  MPI test #5 (simple gcd): " );
+
+    for( i = 0; i < GCD_PAIR_COUNT; i++ )
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &X, gcd_pairs[i][0] ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &Y, gcd_pairs[i][1] ) );
+
+        MBEDTLS_MPI_CHK( mbedtls_mpi_gcd( &A, &X, &Y ) );
+
+        if( mbedtls_mpi_cmp_int( &A, gcd_pairs[i][2] ) != 0 )
+        {
+            if( verbose != 0 )
+                mbedtls_printf( "failed at %d\n", i );
+
+            ret = 1;
+            goto cleanup;
+        }
+    }
+
+    if( verbose != 0 )
+        mbedtls_printf( "passed\n" );
+
+cleanup:
+
+    if( ret != 0 && verbose != 0 )
+        mbedtls_printf( "Unexpected error, return code = %08X\n", ret );
+
+    mbedtls_mpi_free( &A ); mbedtls_mpi_free( &E ); mbedtls_mpi_free( &N ); mbedtls_mpi_free( &X );
+    mbedtls_mpi_free( &Y ); mbedtls_mpi_free( &U ); mbedtls_mpi_free( &V );
+
+    if( verbose != 0 )
+        mbedtls_printf( "\n" );
+
+    return( ret );
+}
+
+#endif /* MBEDTLS_SELF_TEST */
+
+#endif /* MBEDTLS_BIGNUM_C */

+ 625 - 0
Pal/lib/crypto/mbedtls/dhm.c

@@ -0,0 +1,625 @@
+/*
+ *  Diffie-Hellman-Merkle key exchange
+ *
+ *  Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of mbed TLS (https://tls.mbed.org)
+ */
+/*
+ *  The following sources were referenced in the design of this implementation
+ *  of the Diffie-Hellman-Merkle algorithm:
+ *
+ *  [1] Handbook of Applied Cryptography - 1997, Chapter 12
+ *      Menezes, van Oorschot and Vanstone
+ *
+ */
+
+#if !defined(MBEDTLS_CONFIG_FILE)
+#include "mbedtls/config.h"
+#else
+#include MBEDTLS_CONFIG_FILE
+#endif
+
+#if defined(MBEDTLS_DHM_C)
+
+#include "mbedtls/dhm.h"
+
+#if defined(MBEDTLS_PEM_PARSE_C)
+#include "mbedtls/pem.h"
+#endif
+
+#if defined(MBEDTLS_ASN1_PARSE_C)
+#include "mbedtls/asn1.h"
+#endif
+
+#if defined(MBEDTLS_PLATFORM_C)
+#include "mbedtls/platform.h"
+#else
+#include <stdlib.h>
+#include <stdio.h>
+#define mbedtls_printf     printf
+#define mbedtls_calloc    calloc
+#define mbedtls_free       free
+#endif
+
+/* Implementation that should never be optimized out by the compiler */
+static void mbedtls_zeroize( void *v, size_t n ) {
+    volatile unsigned char *p = v; while( n-- ) *p++ = 0;
+}
+
+/*
+ * helper to validate the mbedtls_mpi size and import it
+ */
+static int dhm_read_bignum( mbedtls_mpi *X,
+                            unsigned char **p,
+                            const unsigned char *end )
+{
+    int ret, n;
+
+    if( end - *p < 2 )
+        return( MBEDTLS_ERR_DHM_BAD_INPUT_DATA );
+
+    n = ( (*p)[0] << 8 ) | (*p)[1];
+    (*p) += 2;
+
+    if( (int)( end - *p ) < n )
+        return( MBEDTLS_ERR_DHM_BAD_INPUT_DATA );
+
+    if( ( ret = mbedtls_mpi_read_binary( X, *p, n ) ) != 0 )
+        return( MBEDTLS_ERR_DHM_READ_PARAMS_FAILED + ret );
+
+    (*p) += n;
+
+    return( 0 );
+}
+
+/*
+ * Verify sanity of parameter with regards to P
+ *
+ * Parameter should be: 2 <= public_param <= P - 2
+ *
+ * For more information on the attack, see:
+ *  http://www.cl.cam.ac.uk/~rja14/Papers/psandqs.pdf
+ *  http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2005-2643
+ */
+static int dhm_check_range( const mbedtls_mpi *param, const mbedtls_mpi *P )
+{
+    mbedtls_mpi L, U;
+    int ret = MBEDTLS_ERR_DHM_BAD_INPUT_DATA;
+
+    mbedtls_mpi_init( &L ); mbedtls_mpi_init( &U );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &L, 2 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_int( &U, P, 2 ) );
+
+    if( mbedtls_mpi_cmp_mpi( param, &L ) >= 0 &&
+        mbedtls_mpi_cmp_mpi( param, &U ) <= 0 )
+    {
+        ret = 0;
+    }
+
+cleanup:
+    mbedtls_mpi_free( &L ); mbedtls_mpi_free( &U );
+    return( ret );
+}
+
+void mbedtls_dhm_init( mbedtls_dhm_context *ctx )
+{
+    memset( ctx, 0, sizeof( mbedtls_dhm_context ) );
+}
+
+/*
+ * Parse the ServerKeyExchange parameters
+ */
+int mbedtls_dhm_read_params( mbedtls_dhm_context *ctx,
+                     unsigned char **p,
+                     const unsigned char *end )
+{
+    int ret;
+
+    if( ( ret = dhm_read_bignum( &ctx->P,  p, end ) ) != 0 ||
+        ( ret = dhm_read_bignum( &ctx->G,  p, end ) ) != 0 ||
+        ( ret = dhm_read_bignum( &ctx->GY, p, end ) ) != 0 )
+        return( ret );
+
+    if( ( ret = dhm_check_range( &ctx->GY, &ctx->P ) ) != 0 )
+        return( ret );
+
+    ctx->len = mbedtls_mpi_size( &ctx->P );
+
+    return( 0 );
+}
+
+/*
+ * Setup and write the ServerKeyExchange parameters
+ */
+int mbedtls_dhm_make_params( mbedtls_dhm_context *ctx, int x_size,
+                     unsigned char *output, size_t *olen,
+                     int (*f_rng)(void *, unsigned char *, size_t),
+                     void *p_rng )
+{
+    int ret, count = 0;
+    size_t n1, n2, n3;
+    unsigned char *p;
+
+    if( mbedtls_mpi_cmp_int( &ctx->P, 0 ) == 0 )
+        return( MBEDTLS_ERR_DHM_BAD_INPUT_DATA );
+
+    /*
+     * Generate X as large as possible ( < P )
+     */
+    do
+    {
+        mbedtls_mpi_fill_random( &ctx->X, x_size, f_rng, p_rng );
+
+        while( mbedtls_mpi_cmp_mpi( &ctx->X, &ctx->P ) >= 0 )
+            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &ctx->X, 1 ) );
+
+        if( count++ > 10 )
+            return( MBEDTLS_ERR_DHM_MAKE_PARAMS_FAILED );
+    }
+    while( dhm_check_range( &ctx->X, &ctx->P ) != 0 );
+
+    /*
+     * Calculate GX = G^X mod P
+     */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_exp_mod( &ctx->GX, &ctx->G, &ctx->X,
+                          &ctx->P , &ctx->RP ) );
+
+    if( ( ret = dhm_check_range( &ctx->GX, &ctx->P ) ) != 0 )
+        return( ret );
+
+    /*
+     * export P, G, GX
+     */
+#define DHM_MPI_EXPORT(X,n)                     \
+    MBEDTLS_MPI_CHK( mbedtls_mpi_write_binary( X, p + 2, n ) ); \
+    *p++ = (unsigned char)( n >> 8 );           \
+    *p++ = (unsigned char)( n      ); p += n;
+
+    n1 = mbedtls_mpi_size( &ctx->P  );
+    n2 = mbedtls_mpi_size( &ctx->G  );
+    n3 = mbedtls_mpi_size( &ctx->GX );
+
+    p = output;
+    DHM_MPI_EXPORT( &ctx->P , n1 );
+    DHM_MPI_EXPORT( &ctx->G , n2 );
+    DHM_MPI_EXPORT( &ctx->GX, n3 );
+
+    *olen  = p - output;
+
+    ctx->len = n1;
+
+cleanup:
+
+    if( ret != 0 )
+        return( MBEDTLS_ERR_DHM_MAKE_PARAMS_FAILED + ret );
+
+    return( 0 );
+}
+
+/*
+ * Import the peer's public value G^Y
+ */
+int mbedtls_dhm_read_public( mbedtls_dhm_context *ctx,
+                     const unsigned char *input, size_t ilen )
+{
+    int ret;
+
+    if( ctx == NULL || ilen < 1 || ilen > ctx->len )
+        return( MBEDTLS_ERR_DHM_BAD_INPUT_DATA );
+
+    if( ( ret = mbedtls_mpi_read_binary( &ctx->GY, input, ilen ) ) != 0 )
+        return( MBEDTLS_ERR_DHM_READ_PUBLIC_FAILED + ret );
+
+    return( 0 );
+}
+
+/*
+ * Create own private value X and export G^X
+ */
+int mbedtls_dhm_make_public( mbedtls_dhm_context *ctx, int x_size,
+                     unsigned char *output, size_t olen,
+                     int (*f_rng)(void *, unsigned char *, size_t),
+                     void *p_rng )
+{
+    int ret, count = 0;
+
+    if( ctx == NULL || olen < 1 || olen > ctx->len )
+        return( MBEDTLS_ERR_DHM_BAD_INPUT_DATA );
+
+    if( mbedtls_mpi_cmp_int( &ctx->P, 0 ) == 0 )
+        return( MBEDTLS_ERR_DHM_BAD_INPUT_DATA );
+
+    /*
+     * generate X and calculate GX = G^X mod P
+     */
+    do
+    {
+        mbedtls_mpi_fill_random( &ctx->X, x_size, f_rng, p_rng );
+
+        while( mbedtls_mpi_cmp_mpi( &ctx->X, &ctx->P ) >= 0 )
+            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &ctx->X, 1 ) );
+
+        if( count++ > 10 )
+            return( MBEDTLS_ERR_DHM_MAKE_PUBLIC_FAILED );
+    }
+    while( dhm_check_range( &ctx->X, &ctx->P ) != 0 );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_exp_mod( &ctx->GX, &ctx->G, &ctx->X,
+                          &ctx->P , &ctx->RP ) );
+
+    if( ( ret = dhm_check_range( &ctx->GX, &ctx->P ) ) != 0 )
+        return( ret );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_write_binary( &ctx->GX, output, olen ) );
+
+cleanup:
+
+    if( ret != 0 )
+        return( MBEDTLS_ERR_DHM_MAKE_PUBLIC_FAILED + ret );
+
+    return( 0 );
+}
+
+/*
+ * Use the blinding method and optimisation suggested in section 10 of:
+ *  KOCHER, Paul C. Timing attacks on implementations of Diffie-Hellman, RSA,
+ *  DSS, and other systems. In : Advances in Cryptology-CRYPTO'96. Springer
+ *  Berlin Heidelberg, 1996. p. 104-113.
+ */
+static int dhm_update_blinding( mbedtls_dhm_context *ctx,
+                    int (*f_rng)(void *, unsigned char *, size_t), void *p_rng )
+{
+    int ret, count;
+
+    /*
+     * Don't use any blinding the first time a particular X is used,
+     * but remember it to use blinding next time.
+     */
+    if( mbedtls_mpi_cmp_mpi( &ctx->X, &ctx->pX ) != 0 )
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &ctx->pX, &ctx->X ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &ctx->Vi, 1 ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &ctx->Vf, 1 ) );
+
+        return( 0 );
+    }
+
+    /*
+     * Ok, we need blinding. Can we re-use existing values?
+     * If yes, just update them by squaring them.
+     */
+    if( mbedtls_mpi_cmp_int( &ctx->Vi, 1 ) != 0 )
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &ctx->Vi, &ctx->Vi, &ctx->Vi ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &ctx->Vi, &ctx->Vi, &ctx->P ) );
+
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &ctx->Vf, &ctx->Vf, &ctx->Vf ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &ctx->Vf, &ctx->Vf, &ctx->P ) );
+
+        return( 0 );
+    }
+
+    /*
+     * We need to generate blinding values from scratch
+     */
+
+    /* Vi = random( 2, P-1 ) */
+    count = 0;
+    do
+    {
+        mbedtls_mpi_fill_random( &ctx->Vi, mbedtls_mpi_size( &ctx->P ), f_rng, p_rng );
+
+        while( mbedtls_mpi_cmp_mpi( &ctx->Vi, &ctx->P ) >= 0 )
+            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &ctx->Vi, 1 ) );
+
+        if( count++ > 10 )
+            return( MBEDTLS_ERR_MPI_NOT_ACCEPTABLE );
+    }
+    while( mbedtls_mpi_cmp_int( &ctx->Vi, 1 ) <= 0 );
+
+    /* Vf = Vi^-X mod P */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_inv_mod( &ctx->Vf, &ctx->Vi, &ctx->P ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_exp_mod( &ctx->Vf, &ctx->Vf, &ctx->X, &ctx->P, &ctx->RP ) );
+
+cleanup:
+    return( ret );
+}
+
+/*
+ * Derive and export the shared secret (G^Y)^X mod P
+ */
+int mbedtls_dhm_calc_secret( mbedtls_dhm_context *ctx,
+                     unsigned char *output, size_t output_size, size_t *olen,
+                     int (*f_rng)(void *, unsigned char *, size_t),
+                     void *p_rng )
+{
+    int ret;
+    mbedtls_mpi GYb;
+
+    if( ctx == NULL || output_size < ctx->len )
+        return( MBEDTLS_ERR_DHM_BAD_INPUT_DATA );
+
+    if( ( ret = dhm_check_range( &ctx->GY, &ctx->P ) ) != 0 )
+        return( ret );
+
+    mbedtls_mpi_init( &GYb );
+
+    /* Blind peer's value */
+    if( f_rng != NULL )
+    {
+        MBEDTLS_MPI_CHK( dhm_update_blinding( ctx, f_rng, p_rng ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &GYb, &ctx->GY, &ctx->Vi ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &GYb, &GYb, &ctx->P ) );
+    }
+    else
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &GYb, &ctx->GY ) );
+
+    /* Do modular exponentiation */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_exp_mod( &ctx->K, &GYb, &ctx->X,
+                          &ctx->P, &ctx->RP ) );
+
+    /* Unblind secret value */
+    if( f_rng != NULL )
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &ctx->K, &ctx->K, &ctx->Vf ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &ctx->K, &ctx->K, &ctx->P ) );
+    }
+
+    *olen = mbedtls_mpi_size( &ctx->K );
+
+    MBEDTLS_MPI_CHK( mbedtls_mpi_write_binary( &ctx->K, output, *olen ) );
+
+cleanup:
+    mbedtls_mpi_free( &GYb );
+
+    if( ret != 0 )
+        return( MBEDTLS_ERR_DHM_CALC_SECRET_FAILED + ret );
+
+    return( 0 );
+}
+
+/*
+ * Free the components of a DHM key
+ */
+void mbedtls_dhm_free( mbedtls_dhm_context *ctx )
+{
+    mbedtls_mpi_free( &ctx->pX); mbedtls_mpi_free( &ctx->Vf ); mbedtls_mpi_free( &ctx->Vi );
+    mbedtls_mpi_free( &ctx->RP ); mbedtls_mpi_free( &ctx->K ); mbedtls_mpi_free( &ctx->GY );
+    mbedtls_mpi_free( &ctx->GX ); mbedtls_mpi_free( &ctx->X ); mbedtls_mpi_free( &ctx->G );
+    mbedtls_mpi_free( &ctx->P );
+
+    mbedtls_zeroize( ctx, sizeof( mbedtls_dhm_context ) );
+}
+
+#if defined(MBEDTLS_ASN1_PARSE_C)
+/*
+ * Parse DHM parameters
+ */
+int mbedtls_dhm_parse_dhm( mbedtls_dhm_context *dhm, const unsigned char *dhmin,
+                   size_t dhminlen )
+{
+    int ret;
+    size_t len;
+    unsigned char *p, *end;
+#if defined(MBEDTLS_PEM_PARSE_C)
+    mbedtls_pem_context pem;
+
+    mbedtls_pem_init( &pem );
+
+    /* Avoid calling mbedtls_pem_read_buffer() on non-null-terminated string */
+    if( dhminlen == 0 || dhmin[dhminlen - 1] != '\0' )
+        ret = MBEDTLS_ERR_PEM_NO_HEADER_FOOTER_PRESENT;
+    else
+        ret = mbedtls_pem_read_buffer( &pem,
+                               "-----BEGIN DH PARAMETERS-----",
+                               "-----END DH PARAMETERS-----",
+                               dhmin, NULL, 0, &dhminlen );
+
+    if( ret == 0 )
+    {
+        /*
+         * Was PEM encoded
+         */
+        dhminlen = pem.buflen;
+    }
+    else if( ret != MBEDTLS_ERR_PEM_NO_HEADER_FOOTER_PRESENT )
+        goto exit;
+
+    p = ( ret == 0 ) ? pem.buf : (unsigned char *) dhmin;
+#else
+    p = (unsigned char *) dhmin;
+#endif /* MBEDTLS_PEM_PARSE_C */
+    end = p + dhminlen;
+
+    /*
+     *  DHParams ::= SEQUENCE {
+     *      prime              INTEGER,  -- P
+     *      generator          INTEGER,  -- g
+     *      privateValueLength INTEGER OPTIONAL
+     *  }
+     */
+    if( ( ret = mbedtls_asn1_get_tag( &p, end, &len,
+            MBEDTLS_ASN1_CONSTRUCTED | MBEDTLS_ASN1_SEQUENCE ) ) != 0 )
+    {
+        ret = MBEDTLS_ERR_DHM_INVALID_FORMAT + ret;
+        goto exit;
+    }
+
+    end = p + len;
+
+    if( ( ret = mbedtls_asn1_get_mpi( &p, end, &dhm->P  ) ) != 0 ||
+        ( ret = mbedtls_asn1_get_mpi( &p, end, &dhm->G ) ) != 0 )
+    {
+        ret = MBEDTLS_ERR_DHM_INVALID_FORMAT + ret;
+        goto exit;
+    }
+
+    if( p != end )
+    {
+        /* This might be the optional privateValueLength.
+         * If so, we can cleanly discard it */
+        mbedtls_mpi rec;
+        mbedtls_mpi_init( &rec );
+        ret = mbedtls_asn1_get_mpi( &p, end, &rec );
+        mbedtls_mpi_free( &rec );
+        if ( ret != 0 )
+        {
+            ret = MBEDTLS_ERR_DHM_INVALID_FORMAT + ret;
+            goto exit;
+        }
+        if ( p != end )
+        {
+            ret = MBEDTLS_ERR_DHM_INVALID_FORMAT +
+                MBEDTLS_ERR_ASN1_LENGTH_MISMATCH;
+            goto exit;
+        }
+    }
+
+    ret = 0;
+
+    dhm->len = mbedtls_mpi_size( &dhm->P );
+
+exit:
+#if defined(MBEDTLS_PEM_PARSE_C)
+    mbedtls_pem_free( &pem );
+#endif
+    if( ret != 0 )
+        mbedtls_dhm_free( dhm );
+
+    return( ret );
+}
+
+#if defined(MBEDTLS_FS_IO)
+/*
+ * Load all data from a file into a given buffer.
+ *
+ * The file is expected to contain either PEM or DER encoded data.
+ * A terminating null byte is always appended. It is included in the announced
+ * length only if the data looks like it is PEM encoded.
+ */
+static int load_file( const char *path, unsigned char **buf, size_t *n )
+{
+    FILE *f;
+    long size;
+
+    if( ( f = fopen( path, "rb" ) ) == NULL )
+        return( MBEDTLS_ERR_DHM_FILE_IO_ERROR );
+
+    fseek( f, 0, SEEK_END );
+    if( ( size = ftell( f ) ) == -1 )
+    {
+        fclose( f );
+        return( MBEDTLS_ERR_DHM_FILE_IO_ERROR );
+    }
+    fseek( f, 0, SEEK_SET );
+
+    *n = (size_t) size;
+
+    if( *n + 1 == 0 ||
+        ( *buf = mbedtls_calloc( 1, *n + 1 ) ) == NULL )
+    {
+        fclose( f );
+        return( MBEDTLS_ERR_DHM_ALLOC_FAILED );
+    }
+
+    if( fread( *buf, 1, *n, f ) != *n )
+    {
+        fclose( f );
+        mbedtls_free( *buf );
+        return( MBEDTLS_ERR_DHM_FILE_IO_ERROR );
+    }
+
+    fclose( f );
+
+    (*buf)[*n] = '\0';
+
+    if( strstr( (const char *) *buf, "-----BEGIN " ) != NULL )
+        ++*n;
+
+    return( 0 );
+}
+
+/*
+ * Load and parse DHM parameters
+ */
+int mbedtls_dhm_parse_dhmfile( mbedtls_dhm_context *dhm, const char *path )
+{
+    int ret;
+    size_t n;
+    unsigned char *buf;
+
+    if( ( ret = load_file( path, &buf, &n ) ) != 0 )
+        return( ret );
+
+    ret = mbedtls_dhm_parse_dhm( dhm, buf, n );
+
+    mbedtls_zeroize( buf, n );
+    mbedtls_free( buf );
+
+    return( ret );
+}
+#endif /* MBEDTLS_FS_IO */
+#endif /* MBEDTLS_ASN1_PARSE_C */
+
+#if defined(MBEDTLS_SELF_TEST)
+
+static const char mbedtls_test_dhm_params[] =
+"-----BEGIN DH PARAMETERS-----\r\n"
+"MIGHAoGBAJ419DBEOgmQTzo5qXl5fQcN9TN455wkOL7052HzxxRVMyhYmwQcgJvh\r\n"
+"1sa18fyfR9OiVEMYglOpkqVoGLN7qd5aQNNi5W7/C+VBdHTBJcGZJyyP5B3qcz32\r\n"
+"9mLJKudlVudV0Qxk5qUJaPZ/xupz0NyoVpviuiBOI1gNi8ovSXWzAgEC\r\n"
+"-----END DH PARAMETERS-----\r\n";
+
+static const size_t mbedtls_test_dhm_params_len = sizeof( mbedtls_test_dhm_params );
+
+/*
+ * Checkup routine
+ */
+int mbedtls_dhm_self_test( int verbose )
+{
+    int ret;
+    mbedtls_dhm_context dhm;
+
+    mbedtls_dhm_init( &dhm );
+
+    if( verbose != 0 )
+        mbedtls_printf( "  DHM parameter load: " );
+
+    if( ( ret = mbedtls_dhm_parse_dhm( &dhm,
+                    (const unsigned char *) mbedtls_test_dhm_params,
+                    mbedtls_test_dhm_params_len ) ) != 0 )
+    {
+        if( verbose != 0 )
+            mbedtls_printf( "failed\n" );
+
+        ret = 1;
+        goto exit;
+    }
+
+    if( verbose != 0 )
+        mbedtls_printf( "passed\n\n" );
+
+exit:
+    mbedtls_dhm_free( &dhm );
+
+    return( ret );
+}
+
+#endif /* MBEDTLS_SELF_TEST */
+
+#endif /* MBEDTLS_DHM_C */

+ 717 - 0
Pal/lib/crypto/mbedtls/mbedtls/bignum.h

@@ -0,0 +1,717 @@
+/**
+ * \file bignum.h
+ *
+ * \brief  Multi-precision integer library
+ *
+ *  Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of mbed TLS (https://tls.mbed.org)
+ */
+#ifndef MBEDTLS_BIGNUM_H
+#define MBEDTLS_BIGNUM_H
+
+#if !defined(MBEDTLS_CONFIG_FILE)
+#include "config.h"
+#else
+#include MBEDTLS_CONFIG_FILE
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(MBEDTLS_FS_IO)
+#include <stdio.h>
+#endif
+
+#define MBEDTLS_ERR_MPI_FILE_IO_ERROR                     -0x0002  /**< An error occurred while reading from or writing to a file. */
+#define MBEDTLS_ERR_MPI_BAD_INPUT_DATA                    -0x0004  /**< Bad input parameters to function. */
+#define MBEDTLS_ERR_MPI_INVALID_CHARACTER                 -0x0006  /**< There is an invalid character in the digit string. */
+#define MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL                  -0x0008  /**< The buffer is too small to write to. */
+#define MBEDTLS_ERR_MPI_NEGATIVE_VALUE                    -0x000A  /**< The input arguments are negative or result in illegal output. */
+#define MBEDTLS_ERR_MPI_DIVISION_BY_ZERO                  -0x000C  /**< The input argument for division is zero, which is not allowed. */
+#define MBEDTLS_ERR_MPI_NOT_ACCEPTABLE                    -0x000E  /**< The input arguments are not acceptable. */
+#define MBEDTLS_ERR_MPI_ALLOC_FAILED                      -0x0010  /**< Memory allocation failed. */
+
+#define MBEDTLS_MPI_CHK(f) do { if( ( ret = f ) != 0 ) goto cleanup; } while( 0 )
+
+/*
+ * Maximum size MPIs are allowed to grow to in number of limbs.
+ */
+#define MBEDTLS_MPI_MAX_LIMBS                             10000
+
+#if !defined(MBEDTLS_MPI_WINDOW_SIZE)
+/*
+ * Maximum window size used for modular exponentiation. Default: 6
+ * Minimum value: 1. Maximum value: 6.
+ *
+ * Result is an array of ( 2 << MBEDTLS_MPI_WINDOW_SIZE ) MPIs used
+ * for the sliding window calculation. (So 64 by default)
+ *
+ * Reduction in size, reduces speed.
+ */
+#define MBEDTLS_MPI_WINDOW_SIZE                           6        /**< Maximum windows size used. */
+#endif /* !MBEDTLS_MPI_WINDOW_SIZE */
+
+#if !defined(MBEDTLS_MPI_MAX_SIZE)
+/*
+ * Maximum size of MPIs allowed in bits and bytes for user-MPIs.
+ * ( Default: 512 bytes => 4096 bits, Maximum tested: 2048 bytes => 16384 bits )
+ *
+ * Note: Calculations can results temporarily in larger MPIs. So the number
+ * of limbs required (MBEDTLS_MPI_MAX_LIMBS) is higher.
+ */
+#define MBEDTLS_MPI_MAX_SIZE                              1024     /**< Maximum number of bytes for usable MPIs. */
+#endif /* !MBEDTLS_MPI_MAX_SIZE */
+
+#define MBEDTLS_MPI_MAX_BITS                              ( 8 * MBEDTLS_MPI_MAX_SIZE )    /**< Maximum number of bits for usable MPIs. */
+
+/*
+ * When reading from files with mbedtls_mpi_read_file() and writing to files with
+ * mbedtls_mpi_write_file() the buffer should have space
+ * for a (short) label, the MPI (in the provided radix), the newline
+ * characters and the '\0'.
+ *
+ * By default we assume at least a 10 char label, a minimum radix of 10
+ * (decimal) and a maximum of 4096 bit numbers (1234 decimal chars).
+ * Autosized at compile time for at least a 10 char label, a minimum radix
+ * of 10 (decimal) for a number of MBEDTLS_MPI_MAX_BITS size.
+ *
+ * This used to be statically sized to 1250 for a maximum of 4096 bit
+ * numbers (1234 decimal chars).
+ *
+ * Calculate using the formula:
+ *  MBEDTLS_MPI_RW_BUFFER_SIZE = ceil(MBEDTLS_MPI_MAX_BITS / ln(10) * ln(2)) +
+ *                                LabelSize + 6
+ */
+#define MBEDTLS_MPI_MAX_BITS_SCALE100          ( 100 * MBEDTLS_MPI_MAX_BITS )
+#define MBEDTLS_LN_2_DIV_LN_10_SCALE100                 332
+#define MBEDTLS_MPI_RW_BUFFER_SIZE             ( ((MBEDTLS_MPI_MAX_BITS_SCALE100 + MBEDTLS_LN_2_DIV_LN_10_SCALE100 - 1) / MBEDTLS_LN_2_DIV_LN_10_SCALE100) + 10 + 6 )
+
+/*
+ * Define the base integer type, architecture-wise.
+ *
+ * 32-bit integers can be forced on 64-bit arches (eg. for testing purposes)
+ * by defining MBEDTLS_HAVE_INT32 and undefining MBEDTLS_HAVE_ASM
+ */
+#if ( ! defined(MBEDTLS_HAVE_INT32) && \
+        defined(_MSC_VER) && defined(_M_AMD64) )
+  #define MBEDTLS_HAVE_INT64
+  typedef  int64_t mbedtls_mpi_sint;
+  typedef uint64_t mbedtls_mpi_uint;
+#else
+  #if ( ! defined(MBEDTLS_HAVE_INT32) &&               \
+        defined(__GNUC__) && (                          \
+        defined(__amd64__) || defined(__x86_64__)    || \
+        defined(__ppc64__) || defined(__powerpc64__) || \
+        defined(__ia64__)  || defined(__alpha__)     || \
+        (defined(__sparc__) && defined(__arch64__))  || \
+        defined(__s390x__) || defined(__mips64) ) )
+     #define MBEDTLS_HAVE_INT64
+     typedef  int64_t mbedtls_mpi_sint;
+     typedef uint64_t mbedtls_mpi_uint;
+     /* mbedtls_t_udbl defined as 128-bit unsigned int */
+     typedef unsigned int mbedtls_t_udbl __attribute__((mode(TI)));
+     #define MBEDTLS_HAVE_UDBL
+  #else
+     #define MBEDTLS_HAVE_INT32
+     typedef  int32_t mbedtls_mpi_sint;
+     typedef uint32_t mbedtls_mpi_uint;
+     typedef uint64_t mbedtls_t_udbl;
+     #define MBEDTLS_HAVE_UDBL
+  #endif /* !MBEDTLS_HAVE_INT32 && __GNUC__ && 64-bit platform */
+#endif /* !MBEDTLS_HAVE_INT32 && _MSC_VER && _M_AMD64 */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * \brief          MPI structure
+ */
+typedef struct
+{
+    int s;              /*!<  integer sign      */
+    size_t n;           /*!<  total # of limbs  */
+    mbedtls_mpi_uint *p;          /*!<  pointer to limbs  */
+}
+mbedtls_mpi;
+
+/**
+ * \brief           Initialize one MPI (make internal references valid)
+ *                  This just makes it ready to be set or freed,
+ *                  but does not define a value for the MPI.
+ *
+ * \param X         One MPI to initialize.
+ */
+void mbedtls_mpi_init( mbedtls_mpi *X );
+
+/**
+ * \brief          Unallocate one MPI
+ *
+ * \param X        One MPI to unallocate.
+ */
+void mbedtls_mpi_free( mbedtls_mpi *X );
+
+/**
+ * \brief          Enlarge to the specified number of limbs
+ *
+ * \param X        MPI to grow
+ * \param nblimbs  The target number of limbs
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_grow( mbedtls_mpi *X, size_t nblimbs );
+
+/**
+ * \brief          Resize down, keeping at least the specified number of limbs
+ *
+ * \param X        MPI to shrink
+ * \param nblimbs  The minimum number of limbs to keep
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_shrink( mbedtls_mpi *X, size_t nblimbs );
+
+/**
+ * \brief          Copy the contents of Y into X
+ *
+ * \param X        Destination MPI
+ * \param Y        Source MPI
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_copy( mbedtls_mpi *X, const mbedtls_mpi *Y );
+
+/**
+ * \brief          Swap the contents of X and Y
+ *
+ * \param X        First MPI value
+ * \param Y        Second MPI value
+ */
+void mbedtls_mpi_swap( mbedtls_mpi *X, mbedtls_mpi *Y );
+
+/**
+ * \brief          Safe conditional assignement X = Y if assign is 1
+ *
+ * \param X        MPI to conditionally assign to
+ * \param Y        Value to be assigned
+ * \param assign   1: perform the assignment, 0: keep X's original value
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
+ *
+ * \note           This function is equivalent to
+ *                      if( assign ) mbedtls_mpi_copy( X, Y );
+ *                 except that it avoids leaking any information about whether
+ *                 the assignment was done or not (the above code may leak
+ *                 information through branch prediction and/or memory access
+ *                 patterns analysis).
+ */
+int mbedtls_mpi_safe_cond_assign( mbedtls_mpi *X, const mbedtls_mpi *Y, unsigned char assign );
+
+/**
+ * \brief          Safe conditional swap X <-> Y if swap is 1
+ *
+ * \param X        First mbedtls_mpi value
+ * \param Y        Second mbedtls_mpi value
+ * \param assign   1: perform the swap, 0: keep X and Y's original values
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
+ *
+ * \note           This function is equivalent to
+ *                      if( assign ) mbedtls_mpi_swap( X, Y );
+ *                 except that it avoids leaking any information about whether
+ *                 the assignment was done or not (the above code may leak
+ *                 information through branch prediction and/or memory access
+ *                 patterns analysis).
+ */
+int mbedtls_mpi_safe_cond_swap( mbedtls_mpi *X, mbedtls_mpi *Y, unsigned char assign );
+
+/**
+ * \brief          Set value from integer
+ *
+ * \param X        MPI to set
+ * \param z        Value to use
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_lset( mbedtls_mpi *X, mbedtls_mpi_sint z );
+
+/**
+ * \brief          Get a specific bit from X
+ *
+ * \param X        MPI to use
+ * \param pos      Zero-based index of the bit in X
+ *
+ * \return         Either a 0 or a 1
+ */
+int mbedtls_mpi_get_bit( const mbedtls_mpi *X, size_t pos );
+
+/**
+ * \brief          Set a bit of X to a specific value of 0 or 1
+ *
+ * \note           Will grow X if necessary to set a bit to 1 in a not yet
+ *                 existing limb. Will not grow if bit should be set to 0
+ *
+ * \param X        MPI to use
+ * \param pos      Zero-based index of the bit in X
+ * \param val      The value to set the bit to (0 or 1)
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
+ *                 MBEDTLS_ERR_MPI_BAD_INPUT_DATA if val is not 0 or 1
+ */
+int mbedtls_mpi_set_bit( mbedtls_mpi *X, size_t pos, unsigned char val );
+
+/**
+ * \brief          Return the number of zero-bits before the least significant
+ *                 '1' bit
+ *
+ * Note: Thus also the zero-based index of the least significant '1' bit
+ *
+ * \param X        MPI to use
+ */
+size_t mbedtls_mpi_lsb( const mbedtls_mpi *X );
+
+/**
+ * \brief          Return the number of bits up to and including the most
+ *                 significant '1' bit'
+ *
+ * Note: Thus also the one-based index of the most significant '1' bit
+ *
+ * \param X        MPI to use
+ */
+size_t mbedtls_mpi_bitlen( const mbedtls_mpi *X );
+
+/**
+ * \brief          Return the total size in bytes
+ *
+ * \param X        MPI to use
+ */
+size_t mbedtls_mpi_size( const mbedtls_mpi *X );
+
+/**
+ * \brief          Import from an ASCII string
+ *
+ * \param X        Destination MPI
+ * \param radix    Input numeric base
+ * \param s        Null-terminated string buffer
+ *
+ * \return         0 if successful, or a MBEDTLS_ERR_MPI_XXX error code
+ */
+int mbedtls_mpi_read_string( mbedtls_mpi *X, int radix, const char *s );
+
+/**
+ * \brief          Export into an ASCII string
+ *
+ * \param X        Source MPI
+ * \param radix    Output numeric base
+ * \param buf      Buffer to write the string to
+ * \param buflen   Length of buf
+ * \param olen     Length of the string written, including final NUL byte
+ *
+ * \return         0 if successful, or a MBEDTLS_ERR_MPI_XXX error code.
+ *                 *olen is always updated to reflect the amount
+ *                 of data that has (or would have) been written.
+ *
+ * \note           Call this function with buflen = 0 to obtain the
+ *                 minimum required buffer size in *olen.
+ */
+int mbedtls_mpi_write_string( const mbedtls_mpi *X, int radix,
+                              char *buf, size_t buflen, size_t *olen );
+
+#if defined(MBEDTLS_FS_IO)
+/**
+ * \brief          Read X from an opened file
+ *
+ * \param X        Destination MPI
+ * \param radix    Input numeric base
+ * \param fin      Input file handle
+ *
+ * \return         0 if successful, MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL if
+ *                 the file read buffer is too small or a
+ *                 MBEDTLS_ERR_MPI_XXX error code
+ */
+int mbedtls_mpi_read_file( mbedtls_mpi *X, int radix, FILE *fin );
+
+/**
+ * \brief          Write X into an opened file, or stdout if fout is NULL
+ *
+ * \param p        Prefix, can be NULL
+ * \param X        Source MPI
+ * \param radix    Output numeric base
+ * \param fout     Output file handle (can be NULL)
+ *
+ * \return         0 if successful, or a MBEDTLS_ERR_MPI_XXX error code
+ *
+ * \note           Set fout == NULL to print X on the console.
+ */
+int mbedtls_mpi_write_file( const char *p, const mbedtls_mpi *X, int radix, FILE *fout );
+#endif /* MBEDTLS_FS_IO */
+
+/**
+ * \brief          Import X from unsigned binary data, big endian
+ *
+ * \param X        Destination MPI
+ * \param buf      Input buffer
+ * \param buflen   Input buffer size
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_read_binary( mbedtls_mpi *X, const unsigned char *buf, size_t buflen );
+
+/**
+ * \brief          Export X into unsigned binary data, big endian.
+ *                 Always fills the whole buffer, which will start with zeros
+ *                 if the number is smaller.
+ *
+ * \param X        Source MPI
+ * \param buf      Output buffer
+ * \param buflen   Output buffer size
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL if buf isn't large enough
+ */
+int mbedtls_mpi_write_binary( const mbedtls_mpi *X, unsigned char *buf, size_t buflen );
+
+/**
+ * \brief          Left-shift: X <<= count
+ *
+ * \param X        MPI to shift
+ * \param count    Amount to shift
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_shift_l( mbedtls_mpi *X, size_t count );
+
+/**
+ * \brief          Right-shift: X >>= count
+ *
+ * \param X        MPI to shift
+ * \param count    Amount to shift
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_shift_r( mbedtls_mpi *X, size_t count );
+
+/**
+ * \brief          Compare unsigned values
+ *
+ * \param X        Left-hand MPI
+ * \param Y        Right-hand MPI
+ *
+ * \return         1 if |X| is greater than |Y|,
+ *                -1 if |X| is lesser  than |Y| or
+ *                 0 if |X| is equal to |Y|
+ */
+int mbedtls_mpi_cmp_abs( const mbedtls_mpi *X, const mbedtls_mpi *Y );
+
+/**
+ * \brief          Compare signed values
+ *
+ * \param X        Left-hand MPI
+ * \param Y        Right-hand MPI
+ *
+ * \return         1 if X is greater than Y,
+ *                -1 if X is lesser  than Y or
+ *                 0 if X is equal to Y
+ */
+int mbedtls_mpi_cmp_mpi( const mbedtls_mpi *X, const mbedtls_mpi *Y );
+
+/**
+ * \brief          Compare signed values
+ *
+ * \param X        Left-hand MPI
+ * \param z        The integer value to compare to
+ *
+ * \return         1 if X is greater than z,
+ *                -1 if X is lesser  than z or
+ *                 0 if X is equal to z
+ */
+int mbedtls_mpi_cmp_int( const mbedtls_mpi *X, mbedtls_mpi_sint z );
+
+/**
+ * \brief          Unsigned addition: X = |A| + |B|
+ *
+ * \param X        Destination MPI
+ * \param A        Left-hand MPI
+ * \param B        Right-hand MPI
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_add_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B );
+
+/**
+ * \brief          Unsigned subtraction: X = |A| - |B|
+ *
+ * \param X        Destination MPI
+ * \param A        Left-hand MPI
+ * \param B        Right-hand MPI
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_NEGATIVE_VALUE if B is greater than A
+ */
+int mbedtls_mpi_sub_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B );
+
+/**
+ * \brief          Signed addition: X = A + B
+ *
+ * \param X        Destination MPI
+ * \param A        Left-hand MPI
+ * \param B        Right-hand MPI
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_add_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B );
+
+/**
+ * \brief          Signed subtraction: X = A - B
+ *
+ * \param X        Destination MPI
+ * \param A        Left-hand MPI
+ * \param B        Right-hand MPI
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_sub_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B );
+
+/**
+ * \brief          Signed addition: X = A + b
+ *
+ * \param X        Destination MPI
+ * \param A        Left-hand MPI
+ * \param b        The integer value to add
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_add_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b );
+
+/**
+ * \brief          Signed subtraction: X = A - b
+ *
+ * \param X        Destination MPI
+ * \param A        Left-hand MPI
+ * \param b        The integer value to subtract
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_sub_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b );
+
+/**
+ * \brief          Baseline multiplication: X = A * B
+ *
+ * \param X        Destination MPI
+ * \param A        Left-hand MPI
+ * \param B        Right-hand MPI
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B );
+
+/**
+ * \brief          Baseline multiplication: X = A * b
+ *
+ * \param X        Destination MPI
+ * \param A        Left-hand MPI
+ * \param b        The unsigned integer value to multiply with
+ *
+ * \note           b is unsigned
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_mul_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_uint b );
+
+/**
+ * \brief          Division by mbedtls_mpi: A = Q * B + R
+ *
+ * \param Q        Destination MPI for the quotient
+ * \param R        Destination MPI for the rest value
+ * \param A        Left-hand MPI
+ * \param B        Right-hand MPI
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
+ *                 MBEDTLS_ERR_MPI_DIVISION_BY_ZERO if B == 0
+ *
+ * \note           Either Q or R can be NULL.
+ */
+int mbedtls_mpi_div_mpi( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A, const mbedtls_mpi *B );
+
+/**
+ * \brief          Division by int: A = Q * b + R
+ *
+ * \param Q        Destination MPI for the quotient
+ * \param R        Destination MPI for the rest value
+ * \param A        Left-hand MPI
+ * \param b        Integer to divide by
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
+ *                 MBEDTLS_ERR_MPI_DIVISION_BY_ZERO if b == 0
+ *
+ * \note           Either Q or R can be NULL.
+ */
+int mbedtls_mpi_div_int( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A, mbedtls_mpi_sint b );
+
+/**
+ * \brief          Modulo: R = A mod B
+ *
+ * \param R        Destination MPI for the rest value
+ * \param A        Left-hand MPI
+ * \param B        Right-hand MPI
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
+ *                 MBEDTLS_ERR_MPI_DIVISION_BY_ZERO if B == 0,
+ *                 MBEDTLS_ERR_MPI_NEGATIVE_VALUE if B < 0
+ */
+int mbedtls_mpi_mod_mpi( mbedtls_mpi *R, const mbedtls_mpi *A, const mbedtls_mpi *B );
+
+/**
+ * \brief          Modulo: r = A mod b
+ *
+ * \param r        Destination mbedtls_mpi_uint
+ * \param A        Left-hand MPI
+ * \param b        Integer to divide by
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
+ *                 MBEDTLS_ERR_MPI_DIVISION_BY_ZERO if b == 0,
+ *                 MBEDTLS_ERR_MPI_NEGATIVE_VALUE if b < 0
+ */
+int mbedtls_mpi_mod_int( mbedtls_mpi_uint *r, const mbedtls_mpi *A, mbedtls_mpi_sint b );
+
+/**
+ * \brief          Sliding-window exponentiation: X = A^E mod N
+ *
+ * \param X        Destination MPI
+ * \param A        Left-hand MPI
+ * \param E        Exponent MPI
+ * \param N        Modular MPI
+ * \param _RR      Speed-up MPI used for recalculations
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
+ *                 MBEDTLS_ERR_MPI_BAD_INPUT_DATA if N is negative or even or
+ *                 if E is negative
+ *
+ * \note           _RR is used to avoid re-computing R*R mod N across
+ *                 multiple calls, which speeds up things a bit. It can
+ *                 be set to NULL if the extra performance is unneeded.
+ */
+int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *E, const mbedtls_mpi *N, mbedtls_mpi *_RR );
+
+/**
+ * \brief          Fill an MPI X with size bytes of random
+ *
+ * \param X        Destination MPI
+ * \param size     Size in bytes
+ * \param f_rng    RNG function
+ * \param p_rng    RNG parameter
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_fill_random( mbedtls_mpi *X, size_t size,
+                     int (*f_rng)(void *, unsigned char *, size_t),
+                     void *p_rng );
+
+/**
+ * \brief          Greatest common divisor: G = gcd(A, B)
+ *
+ * \param G        Destination MPI
+ * \param A        Left-hand MPI
+ * \param B        Right-hand MPI
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed
+ */
+int mbedtls_mpi_gcd( mbedtls_mpi *G, const mbedtls_mpi *A, const mbedtls_mpi *B );
+
+/**
+ * \brief          Modular inverse: X = A^-1 mod N
+ *
+ * \param X        Destination MPI
+ * \param A        Left-hand MPI
+ * \param N        Right-hand MPI
+ *
+ * \return         0 if successful,
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
+ *                 MBEDTLS_ERR_MPI_BAD_INPUT_DATA if N is negative or nil
+                   MBEDTLS_ERR_MPI_NOT_ACCEPTABLE if A has no inverse mod N
+ */
+int mbedtls_mpi_inv_mod( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *N );
+
+/**
+ * \brief          Miller-Rabin primality test
+ *
+ * \param X        MPI to check
+ * \param f_rng    RNG function
+ * \param p_rng    RNG parameter
+ *
+ * \return         0 if successful (probably prime),
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
+ *                 MBEDTLS_ERR_MPI_NOT_ACCEPTABLE if X is not prime
+ */
+int mbedtls_mpi_is_prime( const mbedtls_mpi *X,
+                  int (*f_rng)(void *, unsigned char *, size_t),
+                  void *p_rng );
+
+/**
+ * \brief          Prime number generation
+ *
+ * \param X        Destination MPI
+ * \param nbits    Required size of X in bits
+ *                 ( 3 <= nbits <= MBEDTLS_MPI_MAX_BITS )
+ * \param dh_flag  If 1, then (X-1)/2 will be prime too
+ * \param f_rng    RNG function
+ * \param p_rng    RNG parameter
+ *
+ * \return         0 if successful (probably prime),
+ *                 MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed,
+ *                 MBEDTLS_ERR_MPI_BAD_INPUT_DATA if nbits is < 3
+ */
+int mbedtls_mpi_gen_prime( mbedtls_mpi *X, size_t nbits, int dh_flag,
+                   int (*f_rng)(void *, unsigned char *, size_t),
+                   void *p_rng );
+
+/**
+ * \brief          Checkup routine
+ *
+ * \return         0 if successful, or 1 if the test failed
+ */
+int mbedtls_mpi_self_test( int verbose );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* bignum.h */

+ 885 - 0
Pal/lib/crypto/mbedtls/mbedtls/bn_mul.h

@@ -0,0 +1,885 @@
+/**
+ * \file bn_mul.h
+ *
+ * \brief  Multi-precision integer library
+ *
+ *  Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of mbed TLS (https://tls.mbed.org)
+ */
+/*
+ *      Multiply source vector [s] with b, add result
+ *       to destination vector [d] and set carry c.
+ *
+ *      Currently supports:
+ *
+ *         . IA-32 (386+)         . AMD64 / EM64T
+ *         . IA-32 (SSE2)         . Motorola 68000
+ *         . PowerPC, 32-bit      . MicroBlaze
+ *         . PowerPC, 64-bit      . TriCore
+ *         . SPARC v8             . ARM v3+
+ *         . Alpha                . MIPS32
+ *         . C, longlong          . C, generic
+ */
+#ifndef MBEDTLS_BN_MUL_H
+#define MBEDTLS_BN_MUL_H
+
+#include "bignum.h"
+
+#if defined(MBEDTLS_HAVE_ASM)
+
+#ifndef asm
+#define asm __asm
+#endif
+
+/* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */
+#if defined(__GNUC__) && \
+    ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 )
+#if defined(__i386__)
+
+#define MULADDC_INIT                        \
+    asm(                                    \
+        "movl   %%ebx, %0           \n\t"   \
+        "movl   %5, %%esi           \n\t"   \
+        "movl   %6, %%edi           \n\t"   \
+        "movl   %7, %%ecx           \n\t"   \
+        "movl   %8, %%ebx           \n\t"
+
+#define MULADDC_CORE                        \
+        "lodsl                      \n\t"   \
+        "mull   %%ebx               \n\t"   \
+        "addl   %%ecx,   %%eax      \n\t"   \
+        "adcl   $0,      %%edx      \n\t"   \
+        "addl   (%%edi), %%eax      \n\t"   \
+        "adcl   $0,      %%edx      \n\t"   \
+        "movl   %%edx,   %%ecx      \n\t"   \
+        "stosl                      \n\t"
+
+#if defined(MBEDTLS_HAVE_SSE2)
+
+#define MULADDC_HUIT                            \
+        "movd     %%ecx,     %%mm1      \n\t"   \
+        "movd     %%ebx,     %%mm0      \n\t"   \
+        "movd     (%%edi),   %%mm3      \n\t"   \
+        "paddq    %%mm3,     %%mm1      \n\t"   \
+        "movd     (%%esi),   %%mm2      \n\t"   \
+        "pmuludq  %%mm0,     %%mm2      \n\t"   \
+        "movd     4(%%esi),  %%mm4      \n\t"   \
+        "pmuludq  %%mm0,     %%mm4      \n\t"   \
+        "movd     8(%%esi),  %%mm6      \n\t"   \
+        "pmuludq  %%mm0,     %%mm6      \n\t"   \
+        "movd     12(%%esi), %%mm7      \n\t"   \
+        "pmuludq  %%mm0,     %%mm7      \n\t"   \
+        "paddq    %%mm2,     %%mm1      \n\t"   \
+        "movd     4(%%edi),  %%mm3      \n\t"   \
+        "paddq    %%mm4,     %%mm3      \n\t"   \
+        "movd     8(%%edi),  %%mm5      \n\t"   \
+        "paddq    %%mm6,     %%mm5      \n\t"   \
+        "movd     12(%%edi), %%mm4      \n\t"   \
+        "paddq    %%mm4,     %%mm7      \n\t"   \
+        "movd     %%mm1,     (%%edi)    \n\t"   \
+        "movd     16(%%esi), %%mm2      \n\t"   \
+        "pmuludq  %%mm0,     %%mm2      \n\t"   \
+        "psrlq    $32,       %%mm1      \n\t"   \
+        "movd     20(%%esi), %%mm4      \n\t"   \
+        "pmuludq  %%mm0,     %%mm4      \n\t"   \
+        "paddq    %%mm3,     %%mm1      \n\t"   \
+        "movd     24(%%esi), %%mm6      \n\t"   \
+        "pmuludq  %%mm0,     %%mm6      \n\t"   \
+        "movd     %%mm1,     4(%%edi)   \n\t"   \
+        "psrlq    $32,       %%mm1      \n\t"   \
+        "movd     28(%%esi), %%mm3      \n\t"   \
+        "pmuludq  %%mm0,     %%mm3      \n\t"   \
+        "paddq    %%mm5,     %%mm1      \n\t"   \
+        "movd     16(%%edi), %%mm5      \n\t"   \
+        "paddq    %%mm5,     %%mm2      \n\t"   \
+        "movd     %%mm1,     8(%%edi)   \n\t"   \
+        "psrlq    $32,       %%mm1      \n\t"   \
+        "paddq    %%mm7,     %%mm1      \n\t"   \
+        "movd     20(%%edi), %%mm5      \n\t"   \
+        "paddq    %%mm5,     %%mm4      \n\t"   \
+        "movd     %%mm1,     12(%%edi)  \n\t"   \
+        "psrlq    $32,       %%mm1      \n\t"   \
+        "paddq    %%mm2,     %%mm1      \n\t"   \
+        "movd     24(%%edi), %%mm5      \n\t"   \
+        "paddq    %%mm5,     %%mm6      \n\t"   \
+        "movd     %%mm1,     16(%%edi)  \n\t"   \
+        "psrlq    $32,       %%mm1      \n\t"   \
+        "paddq    %%mm4,     %%mm1      \n\t"   \
+        "movd     28(%%edi), %%mm5      \n\t"   \
+        "paddq    %%mm5,     %%mm3      \n\t"   \
+        "movd     %%mm1,     20(%%edi)  \n\t"   \
+        "psrlq    $32,       %%mm1      \n\t"   \
+        "paddq    %%mm6,     %%mm1      \n\t"   \
+        "movd     %%mm1,     24(%%edi)  \n\t"   \
+        "psrlq    $32,       %%mm1      \n\t"   \
+        "paddq    %%mm3,     %%mm1      \n\t"   \
+        "movd     %%mm1,     28(%%edi)  \n\t"   \
+        "addl     $32,       %%edi      \n\t"   \
+        "addl     $32,       %%esi      \n\t"   \
+        "psrlq    $32,       %%mm1      \n\t"   \
+        "movd     %%mm1,     %%ecx      \n\t"
+
+#define MULADDC_STOP                    \
+        "emms                   \n\t"   \
+        "movl   %4, %%ebx       \n\t"   \
+        "movl   %%ecx, %1       \n\t"   \
+        "movl   %%edi, %2       \n\t"   \
+        "movl   %%esi, %3       \n\t"   \
+        : "=m" (t), "=m" (c), "=m" (d), "=m" (s)        \
+        : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b)   \
+        : "eax", "ecx", "edx", "esi", "edi"             \
+    );
+
+#else
+
+#define MULADDC_STOP                    \
+        "movl   %4, %%ebx       \n\t"   \
+        "movl   %%ecx, %1       \n\t"   \
+        "movl   %%edi, %2       \n\t"   \
+        "movl   %%esi, %3       \n\t"   \
+        : "=m" (t), "=m" (c), "=m" (d), "=m" (s)        \
+        : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b)   \
+        : "eax", "ecx", "edx", "esi", "edi"             \
+    );
+#endif /* SSE2 */
+#endif /* i386 */
+
+#if defined(__amd64__) || defined (__x86_64__)
+
+#define MULADDC_INIT                        \
+    asm(                                    \
+        "xorq   %%r8, %%r8          \n\t"
+
+#define MULADDC_CORE                        \
+        "movq   (%%rsi), %%rax      \n\t"   \
+        "mulq   %%rbx               \n\t"   \
+        "addq   $8,      %%rsi      \n\t"   \
+        "addq   %%rcx,   %%rax      \n\t"   \
+        "movq   %%r8,    %%rcx      \n\t"   \
+        "adcq   $0,      %%rdx      \n\t"   \
+        "nop                        \n\t"   \
+        "addq   %%rax,   (%%rdi)    \n\t"   \
+        "adcq   %%rdx,   %%rcx      \n\t"   \
+        "addq   $8,      %%rdi      \n\t"
+
+#define MULADDC_STOP                        \
+        : "+c" (c), "+D" (d), "+S" (s)      \
+        : "b" (b)                           \
+        : "rax", "rdx", "r8"                \
+    );
+
+#endif /* AMD64 */
+
+#if defined(__mc68020__) || defined(__mcpu32__)
+
+#define MULADDC_INIT                    \
+    asm(                                \
+        "movl   %3, %%a2        \n\t"   \
+        "movl   %4, %%a3        \n\t"   \
+        "movl   %5, %%d3        \n\t"   \
+        "movl   %6, %%d2        \n\t"   \
+        "moveq  #0, %%d0        \n\t"
+
+#define MULADDC_CORE                    \
+        "movel  %%a2@+, %%d1    \n\t"   \
+        "mulul  %%d2, %%d4:%%d1 \n\t"   \
+        "addl   %%d3, %%d1      \n\t"   \
+        "addxl  %%d0, %%d4      \n\t"   \
+        "moveq  #0,   %%d3      \n\t"   \
+        "addl   %%d1, %%a3@+    \n\t"   \
+        "addxl  %%d4, %%d3      \n\t"
+
+#define MULADDC_STOP                    \
+        "movl   %%d3, %0        \n\t"   \
+        "movl   %%a3, %1        \n\t"   \
+        "movl   %%a2, %2        \n\t"   \
+        : "=m" (c), "=m" (d), "=m" (s)              \
+        : "m" (s), "m" (d), "m" (c), "m" (b)        \
+        : "d0", "d1", "d2", "d3", "d4", "a2", "a3"  \
+    );
+
+#define MULADDC_HUIT                        \
+        "movel  %%a2@+,  %%d1       \n\t"   \
+        "mulul  %%d2,    %%d4:%%d1  \n\t"   \
+        "addxl  %%d3,    %%d1       \n\t"   \
+        "addxl  %%d0,    %%d4       \n\t"   \
+        "addl   %%d1,    %%a3@+     \n\t"   \
+        "movel  %%a2@+,  %%d1       \n\t"   \
+        "mulul  %%d2,    %%d3:%%d1  \n\t"   \
+        "addxl  %%d4,    %%d1       \n\t"   \
+        "addxl  %%d0,    %%d3       \n\t"   \
+        "addl   %%d1,    %%a3@+     \n\t"   \
+        "movel  %%a2@+,  %%d1       \n\t"   \
+        "mulul  %%d2,    %%d4:%%d1  \n\t"   \
+        "addxl  %%d3,    %%d1       \n\t"   \
+        "addxl  %%d0,    %%d4       \n\t"   \
+        "addl   %%d1,    %%a3@+     \n\t"   \
+        "movel  %%a2@+,  %%d1       \n\t"   \
+        "mulul  %%d2,    %%d3:%%d1  \n\t"   \
+        "addxl  %%d4,    %%d1       \n\t"   \
+        "addxl  %%d0,    %%d3       \n\t"   \
+        "addl   %%d1,    %%a3@+     \n\t"   \
+        "movel  %%a2@+,  %%d1       \n\t"   \
+        "mulul  %%d2,    %%d4:%%d1  \n\t"   \
+        "addxl  %%d3,    %%d1       \n\t"   \
+        "addxl  %%d0,    %%d4       \n\t"   \
+        "addl   %%d1,    %%a3@+     \n\t"   \
+        "movel  %%a2@+,  %%d1       \n\t"   \
+        "mulul  %%d2,    %%d3:%%d1  \n\t"   \
+        "addxl  %%d4,    %%d1       \n\t"   \
+        "addxl  %%d0,    %%d3       \n\t"   \
+        "addl   %%d1,    %%a3@+     \n\t"   \
+        "movel  %%a2@+,  %%d1       \n\t"   \
+        "mulul  %%d2,    %%d4:%%d1  \n\t"   \
+        "addxl  %%d3,    %%d1       \n\t"   \
+        "addxl  %%d0,    %%d4       \n\t"   \
+        "addl   %%d1,    %%a3@+     \n\t"   \
+        "movel  %%a2@+,  %%d1       \n\t"   \
+        "mulul  %%d2,    %%d3:%%d1  \n\t"   \
+        "addxl  %%d4,    %%d1       \n\t"   \
+        "addxl  %%d0,    %%d3       \n\t"   \
+        "addl   %%d1,    %%a3@+     \n\t"   \
+        "addxl  %%d0,    %%d3       \n\t"
+
+#endif /* MC68000 */
+
+#if defined(__powerpc64__) || defined(__ppc64__)
+
+#if defined(__MACH__) && defined(__APPLE__)
+
+#define MULADDC_INIT                        \
+    asm(                                    \
+        "ld     r3, %3              \n\t"   \
+        "ld     r4, %4              \n\t"   \
+        "ld     r5, %5              \n\t"   \
+        "ld     r6, %6              \n\t"   \
+        "addi   r3, r3, -8          \n\t"   \
+        "addi   r4, r4, -8          \n\t"   \
+        "addic  r5, r5,  0          \n\t"
+
+#define MULADDC_CORE                        \
+        "ldu    r7, 8(r3)           \n\t"   \
+        "mulld  r8, r7, r6          \n\t"   \
+        "mulhdu r9, r7, r6          \n\t"   \
+        "adde   r8, r8, r5          \n\t"   \
+        "ld     r7, 8(r4)           \n\t"   \
+        "addze  r5, r9              \n\t"   \
+        "addc   r8, r8, r7          \n\t"   \
+        "stdu   r8, 8(r4)           \n\t"
+
+#define MULADDC_STOP                        \
+        "addze  r5, r5              \n\t"   \
+        "addi   r4, r4, 8           \n\t"   \
+        "addi   r3, r3, 8           \n\t"   \
+        "std    r5, %0              \n\t"   \
+        "std    r4, %1              \n\t"   \
+        "std    r3, %2              \n\t"   \
+        : "=m" (c), "=m" (d), "=m" (s)              \
+        : "m" (s), "m" (d), "m" (c), "m" (b)        \
+        : "r3", "r4", "r5", "r6", "r7", "r8", "r9"  \
+    );
+
+
+#else /* __MACH__ && __APPLE__ */
+
+#define MULADDC_INIT                        \
+    asm(                                    \
+        "ld     %%r3, %3            \n\t"   \
+        "ld     %%r4, %4            \n\t"   \
+        "ld     %%r5, %5            \n\t"   \
+        "ld     %%r6, %6            \n\t"   \
+        "addi   %%r3, %%r3, -8      \n\t"   \
+        "addi   %%r4, %%r4, -8      \n\t"   \
+        "addic  %%r5, %%r5,  0      \n\t"
+
+#define MULADDC_CORE                        \
+        "ldu    %%r7, 8(%%r3)       \n\t"   \
+        "mulld  %%r8, %%r7, %%r6    \n\t"   \
+        "mulhdu %%r9, %%r7, %%r6    \n\t"   \
+        "adde   %%r8, %%r8, %%r5    \n\t"   \
+        "ld     %%r7, 8(%%r4)       \n\t"   \
+        "addze  %%r5, %%r9          \n\t"   \
+        "addc   %%r8, %%r8, %%r7    \n\t"   \
+        "stdu   %%r8, 8(%%r4)       \n\t"
+
+#define MULADDC_STOP                        \
+        "addze  %%r5, %%r5          \n\t"   \
+        "addi   %%r4, %%r4, 8       \n\t"   \
+        "addi   %%r3, %%r3, 8       \n\t"   \
+        "std    %%r5, %0            \n\t"   \
+        "std    %%r4, %1            \n\t"   \
+        "std    %%r3, %2            \n\t"   \
+        : "=m" (c), "=m" (d), "=m" (s)              \
+        : "m" (s), "m" (d), "m" (c), "m" (b)        \
+        : "r3", "r4", "r5", "r6", "r7", "r8", "r9"  \
+    );
+
+#endif /* __MACH__ && __APPLE__ */
+
+#elif defined(__powerpc__) || defined(__ppc__) /* end PPC64/begin PPC32  */
+
+#if defined(__MACH__) && defined(__APPLE__)
+
+#define MULADDC_INIT                    \
+    asm(                                \
+        "lwz    r3, %3          \n\t"   \
+        "lwz    r4, %4          \n\t"   \
+        "lwz    r5, %5          \n\t"   \
+        "lwz    r6, %6          \n\t"   \
+        "addi   r3, r3, -4      \n\t"   \
+        "addi   r4, r4, -4      \n\t"   \
+        "addic  r5, r5,  0      \n\t"
+
+#define MULADDC_CORE                    \
+        "lwzu   r7, 4(r3)       \n\t"   \
+        "mullw  r8, r7, r6      \n\t"   \
+        "mulhwu r9, r7, r6      \n\t"   \
+        "adde   r8, r8, r5      \n\t"   \
+        "lwz    r7, 4(r4)       \n\t"   \
+        "addze  r5, r9          \n\t"   \
+        "addc   r8, r8, r7      \n\t"   \
+        "stwu   r8, 4(r4)       \n\t"
+
+#define MULADDC_STOP                    \
+        "addze  r5, r5          \n\t"   \
+        "addi   r4, r4, 4       \n\t"   \
+        "addi   r3, r3, 4       \n\t"   \
+        "stw    r5, %0          \n\t"   \
+        "stw    r4, %1          \n\t"   \
+        "stw    r3, %2          \n\t"   \
+        : "=m" (c), "=m" (d), "=m" (s)              \
+        : "m" (s), "m" (d), "m" (c), "m" (b)        \
+        : "r3", "r4", "r5", "r6", "r7", "r8", "r9"  \
+    );
+
+#else /* __MACH__ && __APPLE__ */
+
+#define MULADDC_INIT                        \
+    asm(                                    \
+        "lwz    %%r3, %3            \n\t"   \
+        "lwz    %%r4, %4            \n\t"   \
+        "lwz    %%r5, %5            \n\t"   \
+        "lwz    %%r6, %6            \n\t"   \
+        "addi   %%r3, %%r3, -4      \n\t"   \
+        "addi   %%r4, %%r4, -4      \n\t"   \
+        "addic  %%r5, %%r5,  0      \n\t"
+
+#define MULADDC_CORE                        \
+        "lwzu   %%r7, 4(%%r3)       \n\t"   \
+        "mullw  %%r8, %%r7, %%r6    \n\t"   \
+        "mulhwu %%r9, %%r7, %%r6    \n\t"   \
+        "adde   %%r8, %%r8, %%r5    \n\t"   \
+        "lwz    %%r7, 4(%%r4)       \n\t"   \
+        "addze  %%r5, %%r9          \n\t"   \
+        "addc   %%r8, %%r8, %%r7    \n\t"   \
+        "stwu   %%r8, 4(%%r4)       \n\t"
+
+#define MULADDC_STOP                        \
+        "addze  %%r5, %%r5          \n\t"   \
+        "addi   %%r4, %%r4, 4       \n\t"   \
+        "addi   %%r3, %%r3, 4       \n\t"   \
+        "stw    %%r5, %0            \n\t"   \
+        "stw    %%r4, %1            \n\t"   \
+        "stw    %%r3, %2            \n\t"   \
+        : "=m" (c), "=m" (d), "=m" (s)              \
+        : "m" (s), "m" (d), "m" (c), "m" (b)        \
+        : "r3", "r4", "r5", "r6", "r7", "r8", "r9"  \
+    );
+
+#endif /* __MACH__ && __APPLE__ */
+
+#endif /* PPC32 */
+
+/*
+ * The Sparc(64) assembly is reported to be broken.
+ * Disable it for now, until we're able to fix it.
+ */
+#if 0 && defined(__sparc__)
+#if defined(__sparc64__)
+
+#define MULADDC_INIT                                    \
+    asm(                                                \
+                "ldx     %3, %%o0               \n\t"   \
+                "ldx     %4, %%o1               \n\t"   \
+                "ld      %5, %%o2               \n\t"   \
+                "ld      %6, %%o3               \n\t"
+
+#define MULADDC_CORE                                    \
+                "ld      [%%o0], %%o4           \n\t"   \
+                "inc     4, %%o0                \n\t"   \
+                "ld      [%%o1], %%o5           \n\t"   \
+                "umul    %%o3, %%o4, %%o4       \n\t"   \
+                "addcc   %%o4, %%o2, %%o4       \n\t"   \
+                "rd      %%y, %%g1              \n\t"   \
+                "addx    %%g1, 0, %%g1          \n\t"   \
+                "addcc   %%o4, %%o5, %%o4       \n\t"   \
+                "st      %%o4, [%%o1]           \n\t"   \
+                "addx    %%g1, 0, %%o2          \n\t"   \
+                "inc     4, %%o1                \n\t"
+
+        #define MULADDC_STOP                            \
+                "st      %%o2, %0               \n\t"   \
+                "stx     %%o1, %1               \n\t"   \
+                "stx     %%o0, %2               \n\t"   \
+        : "=m" (c), "=m" (d), "=m" (s)          \
+        : "m" (s), "m" (d), "m" (c), "m" (b)    \
+        : "g1", "o0", "o1", "o2", "o3", "o4",   \
+          "o5"                                  \
+        );
+
+#else /* __sparc64__ */
+
+#define MULADDC_INIT                                    \
+    asm(                                                \
+                "ld      %3, %%o0               \n\t"   \
+                "ld      %4, %%o1               \n\t"   \
+                "ld      %5, %%o2               \n\t"   \
+                "ld      %6, %%o3               \n\t"
+
+#define MULADDC_CORE                                    \
+                "ld      [%%o0], %%o4           \n\t"   \
+                "inc     4, %%o0                \n\t"   \
+                "ld      [%%o1], %%o5           \n\t"   \
+                "umul    %%o3, %%o4, %%o4       \n\t"   \
+                "addcc   %%o4, %%o2, %%o4       \n\t"   \
+                "rd      %%y, %%g1              \n\t"   \
+                "addx    %%g1, 0, %%g1          \n\t"   \
+                "addcc   %%o4, %%o5, %%o4       \n\t"   \
+                "st      %%o4, [%%o1]           \n\t"   \
+                "addx    %%g1, 0, %%o2          \n\t"   \
+                "inc     4, %%o1                \n\t"
+
+#define MULADDC_STOP                                    \
+                "st      %%o2, %0               \n\t"   \
+                "st      %%o1, %1               \n\t"   \
+                "st      %%o0, %2               \n\t"   \
+        : "=m" (c), "=m" (d), "=m" (s)          \
+        : "m" (s), "m" (d), "m" (c), "m" (b)    \
+        : "g1", "o0", "o1", "o2", "o3", "o4",   \
+          "o5"                                  \
+        );
+
+#endif /* __sparc64__ */
+#endif /* __sparc__ */
+
+#if defined(__microblaze__) || defined(microblaze)
+
+#define MULADDC_INIT                    \
+    asm(                                \
+        "lwi   r3,   %3         \n\t"   \
+        "lwi   r4,   %4         \n\t"   \
+        "lwi   r5,   %5         \n\t"   \
+        "lwi   r6,   %6         \n\t"   \
+        "andi  r7,   r6, 0xffff \n\t"   \
+        "bsrli r6,   r6, 16     \n\t"
+
+#define MULADDC_CORE                    \
+        "lhui  r8,   r3,   0    \n\t"   \
+        "addi  r3,   r3,   2    \n\t"   \
+        "lhui  r9,   r3,   0    \n\t"   \
+        "addi  r3,   r3,   2    \n\t"   \
+        "mul   r10,  r9,  r6    \n\t"   \
+        "mul   r11,  r8,  r7    \n\t"   \
+        "mul   r12,  r9,  r7    \n\t"   \
+        "mul   r13,  r8,  r6    \n\t"   \
+        "bsrli  r8, r10,  16    \n\t"   \
+        "bsrli  r9, r11,  16    \n\t"   \
+        "add   r13, r13,  r8    \n\t"   \
+        "add   r13, r13,  r9    \n\t"   \
+        "bslli r10, r10,  16    \n\t"   \
+        "bslli r11, r11,  16    \n\t"   \
+        "add   r12, r12, r10    \n\t"   \
+        "addc  r13, r13,  r0    \n\t"   \
+        "add   r12, r12, r11    \n\t"   \
+        "addc  r13, r13,  r0    \n\t"   \
+        "lwi   r10,  r4,   0    \n\t"   \
+        "add   r12, r12, r10    \n\t"   \
+        "addc  r13, r13,  r0    \n\t"   \
+        "add   r12, r12,  r5    \n\t"   \
+        "addc   r5, r13,  r0    \n\t"   \
+        "swi   r12,  r4,   0    \n\t"   \
+        "addi   r4,  r4,   4    \n\t"
+
+#define MULADDC_STOP                    \
+        "swi   r5,   %0         \n\t"   \
+        "swi   r4,   %1         \n\t"   \
+        "swi   r3,   %2         \n\t"   \
+        : "=m" (c), "=m" (d), "=m" (s)              \
+        : "m" (s), "m" (d), "m" (c), "m" (b)        \
+        : "r3", "r4"  "r5", "r6", "r7", "r8",       \
+          "r9", "r10", "r11", "r12", "r13"          \
+    );
+
+#endif /* MicroBlaze */
+
+#if defined(__tricore__)
+
+#define MULADDC_INIT                            \
+    asm(                                        \
+        "ld.a   %%a2, %3                \n\t"   \
+        "ld.a   %%a3, %4                \n\t"   \
+        "ld.w   %%d4, %5                \n\t"   \
+        "ld.w   %%d1, %6                \n\t"   \
+        "xor    %%d5, %%d5              \n\t"
+
+#define MULADDC_CORE                            \
+        "ld.w   %%d0,   [%%a2+]         \n\t"   \
+        "madd.u %%e2, %%e4, %%d0, %%d1  \n\t"   \
+        "ld.w   %%d0,   [%%a3]          \n\t"   \
+        "addx   %%d2,    %%d2,  %%d0    \n\t"   \
+        "addc   %%d3,    %%d3,    0     \n\t"   \
+        "mov    %%d4,    %%d3           \n\t"   \
+        "st.w  [%%a3+],  %%d2           \n\t"
+
+#define MULADDC_STOP                            \
+        "st.w   %0, %%d4                \n\t"   \
+        "st.a   %1, %%a3                \n\t"   \
+        "st.a   %2, %%a2                \n\t"   \
+        : "=m" (c), "=m" (d), "=m" (s)          \
+        : "m" (s), "m" (d), "m" (c), "m" (b)    \
+        : "d0", "d1", "e2", "d4", "a2", "a3"    \
+    );
+
+#endif /* TriCore */
+
+/*
+ * gcc -O0 by default uses r7 for the frame pointer, so it complains about our
+ * use of r7 below, unless -fomit-frame-pointer is passed. Unfortunately,
+ * passing that option is not easy when building with yotta.
+ *
+ * On the other hand, -fomit-frame-pointer is implied by any -Ox options with
+ * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by
+ * clang and armcc5 under the same conditions).
+ *
+ * So, only use the optimized assembly below for optimized build, which avoids
+ * the build error and is pretty reasonable anyway.
+ */
+#if defined(__GNUC__) && !defined(__OPTIMIZE__)
+#define MULADDC_CANNOT_USE_R7
+#endif
+
+#if defined(__arm__) && !defined(MULADDC_CANNOT_USE_R7)
+
+#if defined(__thumb__) && !defined(__thumb2__)
+
+#define MULADDC_INIT                                    \
+    asm(                                                \
+            "ldr    r0, %3                      \n\t"   \
+            "ldr    r1, %4                      \n\t"   \
+            "ldr    r2, %5                      \n\t"   \
+            "ldr    r3, %6                      \n\t"   \
+            "lsr    r7, r3, #16                 \n\t"   \
+            "mov    r9, r7                      \n\t"   \
+            "lsl    r7, r3, #16                 \n\t"   \
+            "lsr    r7, r7, #16                 \n\t"   \
+            "mov    r8, r7                      \n\t"
+
+#define MULADDC_CORE                                    \
+            "ldmia  r0!, {r6}                   \n\t"   \
+            "lsr    r7, r6, #16                 \n\t"   \
+            "lsl    r6, r6, #16                 \n\t"   \
+            "lsr    r6, r6, #16                 \n\t"   \
+            "mov    r4, r8                      \n\t"   \
+            "mul    r4, r6                      \n\t"   \
+            "mov    r3, r9                      \n\t"   \
+            "mul    r6, r3                      \n\t"   \
+            "mov    r5, r9                      \n\t"   \
+            "mul    r5, r7                      \n\t"   \
+            "mov    r3, r8                      \n\t"   \
+            "mul    r7, r3                      \n\t"   \
+            "lsr    r3, r6, #16                 \n\t"   \
+            "add    r5, r5, r3                  \n\t"   \
+            "lsr    r3, r7, #16                 \n\t"   \
+            "add    r5, r5, r3                  \n\t"   \
+            "add    r4, r4, r2                  \n\t"   \
+            "mov    r2, #0                      \n\t"   \
+            "adc    r5, r2                      \n\t"   \
+            "lsl    r3, r6, #16                 \n\t"   \
+            "add    r4, r4, r3                  \n\t"   \
+            "adc    r5, r2                      \n\t"   \
+            "lsl    r3, r7, #16                 \n\t"   \
+            "add    r4, r4, r3                  \n\t"   \
+            "adc    r5, r2                      \n\t"   \
+            "ldr    r3, [r1]                    \n\t"   \
+            "add    r4, r4, r3                  \n\t"   \
+            "adc    r2, r5                      \n\t"   \
+            "stmia  r1!, {r4}                   \n\t"
+
+#define MULADDC_STOP                                    \
+            "str    r2, %0                      \n\t"   \
+            "str    r1, %1                      \n\t"   \
+            "str    r0, %2                      \n\t"   \
+         : "=m" (c),  "=m" (d), "=m" (s)        \
+         : "m" (s), "m" (d), "m" (c), "m" (b)   \
+         : "r0", "r1", "r2", "r3", "r4", "r5",  \
+           "r6", "r7", "r8", "r9", "cc"         \
+         );
+
+#else
+
+#define MULADDC_INIT                                    \
+    asm(                                                \
+            "ldr    r0, %3                      \n\t"   \
+            "ldr    r1, %4                      \n\t"   \
+            "ldr    r2, %5                      \n\t"   \
+            "ldr    r3, %6                      \n\t"
+
+#define MULADDC_CORE                                    \
+            "ldr    r4, [r0], #4                \n\t"   \
+            "mov    r5, #0                      \n\t"   \
+            "ldr    r6, [r1]                    \n\t"   \
+            "umlal  r2, r5, r3, r4              \n\t"   \
+            "adds   r7, r6, r2                  \n\t"   \
+            "adc    r2, r5, #0                  \n\t"   \
+            "str    r7, [r1], #4                \n\t"
+
+#define MULADDC_STOP                                    \
+            "str    r2, %0                      \n\t"   \
+            "str    r1, %1                      \n\t"   \
+            "str    r0, %2                      \n\t"   \
+         : "=m" (c),  "=m" (d), "=m" (s)        \
+         : "m" (s), "m" (d), "m" (c), "m" (b)   \
+         : "r0", "r1", "r2", "r3", "r4", "r5",  \
+           "r6", "r7", "cc"                     \
+         );
+
+#endif /* Thumb */
+
+#endif /* ARMv3 */
+
+#if defined(__alpha__)
+
+#define MULADDC_INIT                    \
+    asm(                                \
+        "ldq    $1, %3          \n\t"   \
+        "ldq    $2, %4          \n\t"   \
+        "ldq    $3, %5          \n\t"   \
+        "ldq    $4, %6          \n\t"
+
+#define MULADDC_CORE                    \
+        "ldq    $6,  0($1)      \n\t"   \
+        "addq   $1,  8, $1      \n\t"   \
+        "mulq   $6, $4, $7      \n\t"   \
+        "umulh  $6, $4, $6      \n\t"   \
+        "addq   $7, $3, $7      \n\t"   \
+        "cmpult $7, $3, $3      \n\t"   \
+        "ldq    $5,  0($2)      \n\t"   \
+        "addq   $7, $5, $7      \n\t"   \
+        "cmpult $7, $5, $5      \n\t"   \
+        "stq    $7,  0($2)      \n\t"   \
+        "addq   $2,  8, $2      \n\t"   \
+        "addq   $6, $3, $3      \n\t"   \
+        "addq   $5, $3, $3      \n\t"
+
+#define MULADDC_STOP                                    \
+        "stq    $3, %0          \n\t"   \
+        "stq    $2, %1          \n\t"   \
+        "stq    $1, %2          \n\t"   \
+        : "=m" (c), "=m" (d), "=m" (s)              \
+        : "m" (s), "m" (d), "m" (c), "m" (b)        \
+        : "$1", "$2", "$3", "$4", "$5", "$6", "$7"  \
+    );
+#endif /* Alpha */
+
+#if defined(__mips__) && !defined(__mips64)
+
+#define MULADDC_INIT                    \
+    asm(                                \
+        "lw     $10, %3         \n\t"   \
+        "lw     $11, %4         \n\t"   \
+        "lw     $12, %5         \n\t"   \
+        "lw     $13, %6         \n\t"
+
+#define MULADDC_CORE                    \
+        "lw     $14, 0($10)     \n\t"   \
+        "multu  $13, $14        \n\t"   \
+        "addi   $10, $10, 4     \n\t"   \
+        "mflo   $14             \n\t"   \
+        "mfhi   $9              \n\t"   \
+        "addu   $14, $12, $14   \n\t"   \
+        "lw     $15, 0($11)     \n\t"   \
+        "sltu   $12, $14, $12   \n\t"   \
+        "addu   $15, $14, $15   \n\t"   \
+        "sltu   $14, $15, $14   \n\t"   \
+        "addu   $12, $12, $9    \n\t"   \
+        "sw     $15, 0($11)     \n\t"   \
+        "addu   $12, $12, $14   \n\t"   \
+        "addi   $11, $11, 4     \n\t"
+
+#define MULADDC_STOP                    \
+        "sw     $12, %0         \n\t"   \
+        "sw     $11, %1         \n\t"   \
+        "sw     $10, %2         \n\t"   \
+        : "=m" (c), "=m" (d), "=m" (s)                      \
+        : "m" (s), "m" (d), "m" (c), "m" (b)                \
+        : "$9", "$10", "$11", "$12", "$13", "$14", "$15"    \
+    );
+
+#endif /* MIPS */
+#endif /* GNUC */
+
+#if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
+
+#define MULADDC_INIT                            \
+    __asm   mov     esi, s                      \
+    __asm   mov     edi, d                      \
+    __asm   mov     ecx, c                      \
+    __asm   mov     ebx, b
+
+#define MULADDC_CORE                            \
+    __asm   lodsd                               \
+    __asm   mul     ebx                         \
+    __asm   add     eax, ecx                    \
+    __asm   adc     edx, 0                      \
+    __asm   add     eax, [edi]                  \
+    __asm   adc     edx, 0                      \
+    __asm   mov     ecx, edx                    \
+    __asm   stosd
+
+#if defined(MBEDTLS_HAVE_SSE2)
+
+#define EMIT __asm _emit
+
+#define MULADDC_HUIT                            \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0xC9             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0xC3             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x1F             \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xCB             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x16             \
+    EMIT 0x0F  EMIT 0xF4  EMIT 0xD0             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x66  EMIT 0x04  \
+    EMIT 0x0F  EMIT 0xF4  EMIT 0xE0             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x76  EMIT 0x08  \
+    EMIT 0x0F  EMIT 0xF4  EMIT 0xF0             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x7E  EMIT 0x0C  \
+    EMIT 0x0F  EMIT 0xF4  EMIT 0xF8             \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xCA             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x5F  EMIT 0x04  \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xDC             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x08  \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xEE             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x67  EMIT 0x0C  \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xFC             \
+    EMIT 0x0F  EMIT 0x7E  EMIT 0x0F             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x56  EMIT 0x10  \
+    EMIT 0x0F  EMIT 0xF4  EMIT 0xD0             \
+    EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x66  EMIT 0x14  \
+    EMIT 0x0F  EMIT 0xF4  EMIT 0xE0             \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xCB             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x76  EMIT 0x18  \
+    EMIT 0x0F  EMIT 0xF4  EMIT 0xF0             \
+    EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x04  \
+    EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x5E  EMIT 0x1C  \
+    EMIT 0x0F  EMIT 0xF4  EMIT 0xD8             \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xCD             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x10  \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xD5             \
+    EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x08  \
+    EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xCF             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x14  \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xE5             \
+    EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x0C  \
+    EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xCA             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x18  \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xF5             \
+    EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x10  \
+    EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xCC             \
+    EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x1C  \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xDD             \
+    EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x14  \
+    EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xCE             \
+    EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x18  \
+    EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
+    EMIT 0x0F  EMIT 0xD4  EMIT 0xCB             \
+    EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x1C  \
+    EMIT 0x83  EMIT 0xC7  EMIT 0x20             \
+    EMIT 0x83  EMIT 0xC6  EMIT 0x20             \
+    EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
+    EMIT 0x0F  EMIT 0x7E  EMIT 0xC9
+
+#define MULADDC_STOP                            \
+    EMIT 0x0F  EMIT 0x77                        \
+    __asm   mov     c, ecx                      \
+    __asm   mov     d, edi                      \
+    __asm   mov     s, esi                      \
+
+#else
+
+#define MULADDC_STOP                            \
+    __asm   mov     c, ecx                      \
+    __asm   mov     d, edi                      \
+    __asm   mov     s, esi                      \
+
+#endif /* SSE2 */
+#endif /* MSVC */
+
+#endif /* MBEDTLS_HAVE_ASM */
+
+#if !defined(MULADDC_CORE)
+#if defined(MBEDTLS_HAVE_UDBL)
+
+#define MULADDC_INIT                    \
+{                                       \
+    mbedtls_t_udbl r;                           \
+    mbedtls_mpi_uint r0, r1;
+
+#define MULADDC_CORE                    \
+    r   = *(s++) * (mbedtls_t_udbl) b;          \
+    r0  = (mbedtls_mpi_uint) r;                   \
+    r1  = (mbedtls_mpi_uint)( r >> biL );         \
+    r0 += c;  r1 += (r0 <  c);          \
+    r0 += *d; r1 += (r0 < *d);          \
+    c = r1; *(d++) = r0;
+
+#define MULADDC_STOP                    \
+}
+
+#else
+#define MULADDC_INIT                    \
+{                                       \
+    mbedtls_mpi_uint s0, s1, b0, b1;              \
+    mbedtls_mpi_uint r0, r1, rx, ry;              \
+    b0 = ( b << biH ) >> biH;           \
+    b1 = ( b >> biH );
+
+#define MULADDC_CORE                    \
+    s0 = ( *s << biH ) >> biH;          \
+    s1 = ( *s >> biH ); s++;            \
+    rx = s0 * b1; r0 = s0 * b0;         \
+    ry = s1 * b0; r1 = s1 * b1;         \
+    r1 += ( rx >> biH );                \
+    r1 += ( ry >> biH );                \
+    rx <<= biH; ry <<= biH;             \
+    r0 += rx; r1 += (r0 < rx);          \
+    r0 += ry; r1 += (r0 < ry);          \
+    r0 +=  c; r1 += (r0 <  c);          \
+    r0 += *d; r1 += (r0 < *d);          \
+    c = r1; *(d++) = r0;
+
+#define MULADDC_STOP                    \
+}
+
+#endif /* C (generic)  */
+#endif /* C (longlong) */
+
+#endif /* bn_mul.h */

+ 3 - 0
Pal/lib/crypto/mbedtls/mbedtls/config.h

@@ -18,6 +18,9 @@
 #ifndef MBEDTLS_CONFIG_H
 #ifndef MBEDTLS_CONFIG_H
 #define MBEDTLS_CONFIG_H
 #define MBEDTLS_CONFIG_H
 
 
+#define MBEDTLS_BIGNUM_C
+#define MBEDTLS_DHM_C
 #define MBEDTLS_SHA256_C
 #define MBEDTLS_SHA256_C
+#define MBEDTLS_PLATFORM_C
 
 
 #endif
 #endif

+ 305 - 0
Pal/lib/crypto/mbedtls/mbedtls/dhm.h

@@ -0,0 +1,305 @@
+/**
+ * \file dhm.h
+ *
+ * \brief Diffie-Hellman-Merkle key exchange
+ *
+ *  Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of mbed TLS (https://tls.mbed.org)
+ */
+#ifndef MBEDTLS_DHM_H
+#define MBEDTLS_DHM_H
+
+#include "bignum.h"
+
+/*
+ * DHM Error codes
+ */
+#define MBEDTLS_ERR_DHM_BAD_INPUT_DATA                    -0x3080  /**< Bad input parameters to function. */
+#define MBEDTLS_ERR_DHM_READ_PARAMS_FAILED                -0x3100  /**< Reading of the DHM parameters failed. */
+#define MBEDTLS_ERR_DHM_MAKE_PARAMS_FAILED                -0x3180  /**< Making of the DHM parameters failed. */
+#define MBEDTLS_ERR_DHM_READ_PUBLIC_FAILED                -0x3200  /**< Reading of the public values failed. */
+#define MBEDTLS_ERR_DHM_MAKE_PUBLIC_FAILED                -0x3280  /**< Making of the public value failed. */
+#define MBEDTLS_ERR_DHM_CALC_SECRET_FAILED                -0x3300  /**< Calculation of the DHM secret failed. */
+#define MBEDTLS_ERR_DHM_INVALID_FORMAT                    -0x3380  /**< The ASN.1 data is not formatted correctly. */
+#define MBEDTLS_ERR_DHM_ALLOC_FAILED                      -0x3400  /**< Allocation of memory failed. */
+#define MBEDTLS_ERR_DHM_FILE_IO_ERROR                     -0x3480  /**< Read/write of file failed. */
+
+/**
+ * RFC 3526 defines a number of standardized Diffie-Hellman groups
+ * for IKE.
+ * RFC 5114 defines a number of standardized Diffie-Hellman groups
+ * that can be used.
+ *
+ * Some are included here for convenience.
+ *
+ * Included are:
+ *  RFC 3526 3.    2048-bit MODP Group
+ *  RFC 3526 4.    3072-bit MODP Group
+ *  RFC 3526 5.    4096-bit MODP Group
+ *  RFC 5114 2.2.  2048-bit MODP Group with 224-bit Prime Order Subgroup
+ */
+#define MBEDTLS_DHM_RFC3526_MODP_2048_P               \
+    "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1" \
+    "29024E088A67CC74020BBEA63B139B22514A08798E3404DD" \
+    "EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245" \
+    "E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED" \
+    "EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D" \
+    "C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F" \
+    "83655D23DCA3AD961C62F356208552BB9ED529077096966D" \
+    "670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B" \
+    "E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9" \
+    "DE2BCBF6955817183995497CEA956AE515D2261898FA0510" \
+    "15728E5A8AACAA68FFFFFFFFFFFFFFFF"
+
+#define MBEDTLS_DHM_RFC3526_MODP_2048_G          "02"
+
+#define MBEDTLS_DHM_RFC3526_MODP_3072_P               \
+    "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1" \
+    "29024E088A67CC74020BBEA63B139B22514A08798E3404DD" \
+    "EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245" \
+    "E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED" \
+    "EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D" \
+    "C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F" \
+    "83655D23DCA3AD961C62F356208552BB9ED529077096966D" \
+    "670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B" \
+    "E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9" \
+    "DE2BCBF6955817183995497CEA956AE515D2261898FA0510" \
+    "15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64" \
+    "ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7" \
+    "ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B" \
+    "F12FFA06D98A0864D87602733EC86A64521F2B18177B200C" \
+    "BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31" \
+    "43DB5BFCE0FD108E4B82D120A93AD2CAFFFFFFFFFFFFFFFF"
+
+#define MBEDTLS_DHM_RFC3526_MODP_3072_G          "02"
+
+#define MBEDTLS_DHM_RFC3526_MODP_4096_P                \
+    "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1" \
+    "29024E088A67CC74020BBEA63B139B22514A08798E3404DD" \
+    "EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245" \
+    "E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED" \
+    "EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D" \
+    "C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F" \
+    "83655D23DCA3AD961C62F356208552BB9ED529077096966D" \
+    "670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B" \
+    "E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9" \
+    "DE2BCBF6955817183995497CEA956AE515D2261898FA0510" \
+    "15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64" \
+    "ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7" \
+    "ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B" \
+    "F12FFA06D98A0864D87602733EC86A64521F2B18177B200C" \
+    "BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31" \
+    "43DB5BFCE0FD108E4B82D120A92108011A723C12A787E6D7" \
+    "88719A10BDBA5B2699C327186AF4E23C1A946834B6150BDA" \
+    "2583E9CA2AD44CE8DBBBC2DB04DE8EF92E8EFC141FBECAA6" \
+    "287C59474E6BC05D99B2964FA090C3A2233BA186515BE7ED" \
+    "1F612970CEE2D7AFB81BDD762170481CD0069127D5B05AA9" \
+    "93B4EA988D8FDDC186FFB7DC90A6C08F4DF435C934063199" \
+    "FFFFFFFFFFFFFFFF"
+
+#define MBEDTLS_DHM_RFC3526_MODP_4096_G          "02"
+
+#define MBEDTLS_DHM_RFC5114_MODP_2048_P               \
+    "AD107E1E9123A9D0D660FAA79559C51FA20D64E5683B9FD1" \
+    "B54B1597B61D0A75E6FA141DF95A56DBAF9A3C407BA1DF15" \
+    "EB3D688A309C180E1DE6B85A1274A0A66D3F8152AD6AC212" \
+    "9037C9EDEFDA4DF8D91E8FEF55B7394B7AD5B7D0B6C12207" \
+    "C9F98D11ED34DBF6C6BA0B2C8BBC27BE6A00E0A0B9C49708" \
+    "B3BF8A317091883681286130BC8985DB1602E714415D9330" \
+    "278273C7DE31EFDC7310F7121FD5A07415987D9ADC0A486D" \
+    "CDF93ACC44328387315D75E198C641A480CD86A1B9E587E8" \
+    "BE60E69CC928B2B9C52172E413042E9B23F10B0E16E79763" \
+    "C9B53DCF4BA80A29E3FB73C16B8E75B97EF363E2FFA31F71" \
+    "CF9DE5384E71B81C0AC4DFFE0C10E64F"
+
+#define MBEDTLS_DHM_RFC5114_MODP_2048_G              \
+    "AC4032EF4F2D9AE39DF30B5C8FFDAC506CDEBE7B89998CAF"\
+    "74866A08CFE4FFE3A6824A4E10B9A6F0DD921F01A70C4AFA"\
+    "AB739D7700C29F52C57DB17C620A8652BE5E9001A8D66AD7"\
+    "C17669101999024AF4D027275AC1348BB8A762D0521BC98A"\
+    "E247150422EA1ED409939D54DA7460CDB5F6C6B250717CBE"\
+    "F180EB34118E98D119529A45D6F834566E3025E316A330EF"\
+    "BB77A86F0C1AB15B051AE3D428C8F8ACB70A8137150B8EEB"\
+    "10E183EDD19963DDD9E263E4770589EF6AA21E7F5F2FF381"\
+    "B539CCE3409D13CD566AFBB48D6C019181E1BCFE94B30269"\
+    "EDFE72FE9B6AA4BD7B5A0F1C71CFFF4C19C418E1F6EC0179"\
+    "81BC087F2A7065B384B890D3191F2BFA"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * \brief          DHM context structure
+ */
+typedef struct
+{
+    size_t len; /*!<  size(P) in chars  */
+    mbedtls_mpi P;      /*!<  prime modulus     */
+    mbedtls_mpi G;      /*!<  generator         */
+    mbedtls_mpi X;      /*!<  secret value      */
+    mbedtls_mpi GX;     /*!<  self = G^X mod P  */
+    mbedtls_mpi GY;     /*!<  peer = G^Y mod P  */
+    mbedtls_mpi K;      /*!<  key = GY^X mod P  */
+    mbedtls_mpi RP;     /*!<  cached R^2 mod P  */
+    mbedtls_mpi Vi;     /*!<  blinding value    */
+    mbedtls_mpi Vf;     /*!<  un-blinding value */
+    mbedtls_mpi pX;     /*!<  previous X        */
+}
+mbedtls_dhm_context;
+
+/**
+ * \brief          Initialize DHM context
+ *
+ * \param ctx      DHM context to be initialized
+ */
+void mbedtls_dhm_init( mbedtls_dhm_context *ctx );
+
+/**
+ * \brief          Parse the ServerKeyExchange parameters
+ *
+ * \param ctx      DHM context
+ * \param p        &(start of input buffer)
+ * \param end      end of buffer
+ *
+ * \return         0 if successful, or an MBEDTLS_ERR_DHM_XXX error code
+ */
+int mbedtls_dhm_read_params( mbedtls_dhm_context *ctx,
+                     unsigned char **p,
+                     const unsigned char *end );
+
+/**
+ * \brief          Setup and write the ServerKeyExchange parameters
+ *
+ * \param ctx      DHM context
+ * \param x_size   private value size in bytes
+ * \param output   destination buffer
+ * \param olen     number of chars written
+ * \param f_rng    RNG function
+ * \param p_rng    RNG parameter
+ *
+ * \note           This function assumes that ctx->P and ctx->G
+ *                 have already been properly set (for example
+ *                 using mbedtls_mpi_read_string or mbedtls_mpi_read_binary).
+ *
+ * \return         0 if successful, or an MBEDTLS_ERR_DHM_XXX error code
+ */
+int mbedtls_dhm_make_params( mbedtls_dhm_context *ctx, int x_size,
+                     unsigned char *output, size_t *olen,
+                     int (*f_rng)(void *, unsigned char *, size_t),
+                     void *p_rng );
+
+/**
+ * \brief          Import the peer's public value G^Y
+ *
+ * \param ctx      DHM context
+ * \param input    input buffer
+ * \param ilen     size of buffer
+ *
+ * \return         0 if successful, or an MBEDTLS_ERR_DHM_XXX error code
+ */
+int mbedtls_dhm_read_public( mbedtls_dhm_context *ctx,
+                     const unsigned char *input, size_t ilen );
+
+/**
+ * \brief          Create own private value X and export G^X
+ *
+ * \param ctx      DHM context
+ * \param x_size   private value size in bytes
+ * \param output   destination buffer
+ * \param olen     must be at least equal to the size of P, ctx->len
+ * \param f_rng    RNG function
+ * \param p_rng    RNG parameter
+ *
+ * \return         0 if successful, or an MBEDTLS_ERR_DHM_XXX error code
+ */
+int mbedtls_dhm_make_public( mbedtls_dhm_context *ctx, int x_size,
+                     unsigned char *output, size_t olen,
+                     int (*f_rng)(void *, unsigned char *, size_t),
+                     void *p_rng );
+
+/**
+ * \brief          Derive and export the shared secret (G^Y)^X mod P
+ *
+ * \param ctx      DHM context
+ * \param output   destination buffer
+ * \param output_size   size of the destination buffer
+ * \param olen     on exit, holds the actual number of bytes written
+ * \param f_rng    RNG function, for blinding purposes
+ * \param p_rng    RNG parameter
+ *
+ * \return         0 if successful, or an MBEDTLS_ERR_DHM_XXX error code
+ *
+ * \note           If non-NULL, f_rng is used to blind the input as
+ *                 countermeasure against timing attacks. Blinding is
+ *                 automatically used if and only if our secret value X is
+ *                 re-used and costs nothing otherwise, so it is recommended
+ *                 to always pass a non-NULL f_rng argument.
+ */
+int mbedtls_dhm_calc_secret( mbedtls_dhm_context *ctx,
+                     unsigned char *output, size_t output_size, size_t *olen,
+                     int (*f_rng)(void *, unsigned char *, size_t),
+                     void *p_rng );
+
+/**
+ * \brief          Free and clear the components of a DHM key
+ *
+ * \param ctx      DHM context to free and clear
+ */
+void mbedtls_dhm_free( mbedtls_dhm_context *ctx );
+
+#if defined(MBEDTLS_ASN1_PARSE_C)
+/** \ingroup x509_module */
+/**
+ * \brief          Parse DHM parameters in PEM or DER format
+ *
+ * \param dhm      DHM context to be initialized
+ * \param dhmin    input buffer
+ * \param dhminlen size of the buffer
+ *                 (including the terminating null byte for PEM data)
+ *
+ * \return         0 if successful, or a specific DHM or PEM error code
+ */
+int mbedtls_dhm_parse_dhm( mbedtls_dhm_context *dhm, const unsigned char *dhmin,
+                   size_t dhminlen );
+
+#if defined(MBEDTLS_FS_IO)
+/** \ingroup x509_module */
+/**
+ * \brief          Load and parse DHM parameters
+ *
+ * \param dhm      DHM context to be initialized
+ * \param path     filename to read the DHM Parameters from
+ *
+ * \return         0 if successful, or a specific DHM or PEM error code
+ */
+int mbedtls_dhm_parse_dhmfile( mbedtls_dhm_context *dhm, const char *path );
+#endif /* MBEDTLS_FS_IO */
+#endif /* MBEDTLS_ASN1_PARSE_C */
+
+/**
+ * \brief          Checkup routine
+ *
+ * \return         0 if successful, or 1 if the test failed
+ */
+int mbedtls_dhm_self_test( int verbose );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* dhm.h */

+ 30 - 0
Pal/lib/crypto/mbedtls/mbedtls/platform.h

@@ -0,0 +1,30 @@
+/* Copyright (C) 2017 Fortanix, Inc.
+
+   This file is part of Graphene Library OS.
+
+   Graphene Library OS is free software: you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation, either version 3 of the
+   License, or (at your option) any later version.
+
+   Graphene Library OS is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef MBEDTLS_PLATFORM_H
+#define MBEDTLS_PLATFORM_H
+
+#include "api.h"
+
+void * malloc(size_t size);
+void * calloc (size_t nmem, size_t size);
+void free(void *);
+
+#define mbedtls_calloc calloc
+#define mbedtls_free free
+
+#endif

+ 3 - 3
Pal/lib/crypto/dh.c → Pal/lib/crypto/wolfssl/dh.c

@@ -25,9 +25,9 @@
 #include <stddef.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdint.h>
 
 
-#include "integer.h"
-#include "dh.h"
-#include "error-crypt.h"
+#include "crypto/integer.h"
+#include "crypto/wolfssl/dh.h"
+#include "crypto/error-crypt.h"
 
 
 /*
 /*
  * source
  * source

+ 1 - 1
Pal/lib/crypto/dh.h → Pal/lib/crypto/wolfssl/dh.h

@@ -36,7 +36,7 @@ typedef uint8_t byte;
 
 
 #define BIT_SIZE  8
 #define BIT_SIZE  8
 
 
-#include "integer.h"
+#include "crypto/integer.h"
 
 
 /* Diffie-Hellman Key */
 /* Diffie-Hellman Key */
 typedef struct DhKey {
 typedef struct DhKey {

+ 8 - 11
Pal/lib/crypto/wolfssl/sha256.c

@@ -22,7 +22,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  */
  */
 
 
-#include "sha256.h"
+#include "crypto/wolfssl/sha256.h"
 #include "api.h"
 #include "api.h"
 
 
 #define XMEMSET memset
 #define XMEMSET memset
@@ -242,15 +242,12 @@ int SHA256Hash(const byte * data, word32 len, byte * hash)
     int ret = 0;
     int ret = 0;
     SHA256 sha256;
     SHA256 sha256;
 
 
-    if ((ret = SHA256Init(&sha256)) != 0) {
-        printf("SHA256Init failed");
-    }
-    else if ((ret = SHA256Update(&sha256, data, len)) != 0) {
-        printf("SHA256Update failed");
-    }
-    else if ((ret = SHA256Final(&sha256, hash)) != 0) {
-        printf("SHA256Final failed");
-    }
+    if ((ret = SHA256Init(&sha256)) != 0)
+        return ret;
+    if ((ret = SHA256Update(&sha256, data, len)) != 0)
+        return ret;
+    else if ((ret = SHA256Final(&sha256, hash)) != 0)
+        return ret;
 
 
-    return ret;
+    return 0;
 }
 }

+ 33 - 18
Pal/lib/pal_crypto.h

@@ -24,41 +24,56 @@
 #ifndef PAL_CRYPTO_H
 #ifndef PAL_CRYPTO_H
 #define PAL_CRYPTO_H
 #define PAL_CRYPTO_H
 
 
-/*
- * You can change which crypto library will be used by changing this
- * define. Currently supported options:
- * - wolfSSL
- */
-#define PAL_CRYPTO_PROVIDER PAL_CRYPTO_MBEDTLS
-
 /* These cryptosystems are still unconditionally provided by WolfSSL. */
 /* These cryptosystems are still unconditionally provided by WolfSSL. */
 #include "crypto/cmac.h"
 #include "crypto/cmac.h"
-#include "crypto/dh.h"
 #include "crypto/rsa.h"
 #include "crypto/rsa.h"
 
 
-#define PAL_CRYPTO_WOLFSSL 1
-#define PAL_CRYPTO_MBEDTLS 2
-
 #define SHA256_DIGEST_LEN 32
 #define SHA256_DIGEST_LEN 32
 
 
-#if PAL_CRYPTO_PROVIDER == PAL_CRYPTO_WOLFSSL
+#ifdef CRYPTO_USE_WOLFSSL
+#define CRYPTO_PROVIDER_SPECIFIED
+
 #include "crypto/wolfssl/sha256.h"
 #include "crypto/wolfssl/sha256.h"
+#include "crypto/wolfssl/dh.h"
 typedef SHA256 LIB_SHA256_CONTEXT;
 typedef SHA256 LIB_SHA256_CONTEXT;
 
 
-#elif PAL_CRYPTO_PROVIDER == PAL_CRYPTO_MBEDTLS
+#define DH_SIZE 128
+
+typedef struct {
+    uint8_t priv[DH_SIZE];
+    uint32_t priv_size;
+    DhKey key;
+} LIB_DH_CONTEXT __attribute__((aligned(DH_SIZE)));
+#endif /* CRYPTO_USE_WOLFSSL */
+
+#ifdef CRYPTO_USE_MBEDTLS
+#define CRYPTO_PROVIDER_SPECIFIED
+
 #include "crypto/mbedtls/mbedtls/sha256.h"
 #include "crypto/mbedtls/mbedtls/sha256.h"
 typedef mbedtls_sha256_context LIB_SHA256_CONTEXT;
 typedef mbedtls_sha256_context LIB_SHA256_CONTEXT;
 
 
-#else
-# error "Unknown crypto provider. Set PAL_CRYPTO_PROVIDER in pal_crypto.h"
-#endif
+/* DH_SIZE is tied to the choice of parameters in mbedtls_dh.c. */
+#define DH_SIZE 256
+#include "crypto/mbedtls/mbedtls/dhm.h"
+typedef mbedtls_dhm_context LIB_DH_CONTEXT;
+#endif /* CRYPTO_USE_MBEDTLS */
 
 
-typedef LIB_SHA256_CONTEXT PAL_SHA256_CONTEXT;
+#ifndef CRYPTO_PROVIDER_SPECIFIED
+# error "Unknown crypto provider. Set CRYPTO_PROVIDER in Makefile"
+#endif
 
 
+/* SHA256 */
 int lib_SHA256Init(LIB_SHA256_CONTEXT *context);
 int lib_SHA256Init(LIB_SHA256_CONTEXT *context);
 int lib_SHA256Update(LIB_SHA256_CONTEXT *context, const uint8_t *data,
 int lib_SHA256Update(LIB_SHA256_CONTEXT *context, const uint8_t *data,
 		     uint64_t len);
 		     uint64_t len);
 int lib_SHA256Final(LIB_SHA256_CONTEXT *context, uint8_t *output);
 int lib_SHA256Final(LIB_SHA256_CONTEXT *context, uint8_t *output);
-                  
+
+/* Diffie-Hellman Key Exchange */
+int lib_DhInit(LIB_DH_CONTEXT *context);
+int lib_DhCreatePublic(LIB_DH_CONTEXT *context, uint8_t *public,
+                       uint64_t *public_size);
+int lib_DhCalcSecret(LIB_DH_CONTEXT *context, uint8_t *peer, uint64_t peer_size,
+                     uint8_t *secret, uint64_t *secret_size);
+void lib_DhFinal(LIB_DH_CONTEXT *context);
 
 
 #endif
 #endif

+ 3 - 0
Pal/src/host/Linux-SGX/Makefile.am

@@ -16,6 +16,9 @@ LDFLAGS	= -shared -nostdlib -z combreloc -z defs \
 	  --hash-style=gnu -e enclave_entry
 	  --hash-style=gnu -e enclave_entry
 ARFLAGS	=
 ARFLAGS	=
 
 
+CRYPTO_PROVIDER = mbedtls
+CFLAGS += -DCRYPTO_USE_MBEDTLS
+
 pal_loader = $(HOST_DIR)/pal-sgx
 pal_loader = $(HOST_DIR)/pal-sgx
 pal_sec =
 pal_sec =
 pal_lib = libpal.so
 pal_lib = libpal.so

+ 35 - 72
Pal/src/host/Linux-SGX/enclave_framework.c

@@ -205,7 +205,7 @@ int load_trusted_file (PAL_HANDLE file, sgx_stub_t ** stubptr,
 
 
     sgx_stub_t * s = stubs;
     sgx_stub_t * s = stubs;
     uint64_t offset = 0;
     uint64_t offset = 0;
-    PAL_SHA256_CONTEXT sha;
+    LIB_SHA256_CONTEXT sha;
     void * umem;
     void * umem;
 
 
     ret = lib_SHA256Init(&sha);
     ret = lib_SHA256Init(&sha);
@@ -537,56 +537,7 @@ int init_trusted_children (void)
     return 0;
     return 0;
 }
 }
 
 
-#include "crypto/dh.h"
-
-static struct {
-    uint8_t p[128], q[20], g[128];
-} dh_param = {
-    {
-        0xfd, 0x7f, 0x53, 0x81, 0x1d, 0x75, 0x12, 0x29,
-        0x52, 0xdf, 0x4a, 0x9c, 0x2e, 0xec, 0xe4, 0xe7,
-        0xf6, 0x11, 0xb7, 0x52, 0x3c, 0xef, 0x44, 0x00,
-        0xc3, 0x1e, 0x3f, 0x80, 0xb6, 0x51, 0x26, 0x69,
-        0x45, 0x5d, 0x40, 0x22, 0x51, 0xfb, 0x59, 0x3d,
-        0x8d, 0x58, 0xfa, 0xbf, 0xc5, 0xf5, 0xba, 0x30,
-        0xf6, 0xcb, 0x9b, 0x55, 0x6c, 0xd7, 0x81, 0x3b,
-        0x80, 0x1d, 0x34, 0x6f, 0xf2, 0x66, 0x60, 0xb7,
-        0x6b, 0x99, 0x50, 0xa5, 0xa4, 0x9f, 0x9f, 0xe8,
-        0x04, 0x7b, 0x10, 0x22, 0xc2, 0x4f, 0xbb, 0xa9,
-        0xd7, 0xfe, 0xb7, 0xc6, 0x1b, 0xf8, 0x3b, 0x57,
-        0xe7, 0xc6, 0xa8, 0xa6, 0x15, 0x0f, 0x04, 0xfb,
-        0x83, 0xf6, 0xd3, 0xc5, 0x1e, 0xc3, 0x02, 0x35,
-        0x54, 0x13, 0x5a, 0x16, 0x91, 0x32, 0xf6, 0x75,
-        0xf3, 0xae, 0x2b, 0x61, 0xd7, 0x2a, 0xef, 0xf2,
-        0x22, 0x03, 0x19, 0x9d, 0xd1, 0x48, 0x01, 0xc7,
-    },
-
-    {
-        0x97, 0x60, 0x50, 0x8f, 0x15, 0x23, 0x0b, 0xcc,
-        0xb2, 0x92, 0xb9, 0x82, 0xa2, 0xeb, 0x84, 0x0b,
-        0xf0, 0x58, 0x1c, 0xf5,
-    },
-
-    {
-        0xf7, 0xe1, 0xa0, 0x85, 0xd6, 0x9b, 0x3d, 0xde,
-        0xcb, 0xbc, 0xab, 0x5c, 0x36, 0xb8, 0x57, 0xb9,
-        0x79, 0x94, 0xaf, 0xbb, 0xfa, 0x3a, 0xea, 0x82,
-        0xf9, 0x57, 0x4c, 0x0b, 0x3d, 0x07, 0x82, 0x67,
-        0x51, 0x59, 0x57, 0x8e, 0xba, 0xd4, 0x59, 0x4f,
-        0xe6, 0x71, 0x07, 0x10, 0x81, 0x80, 0xb4, 0x49,
-        0x16, 0x71, 0x23, 0xe8, 0x4c, 0x28, 0x16, 0x13,
-        0xb7, 0xcf, 0x09, 0x32, 0x8c, 0xc8, 0xa6, 0xe1,
-        0x3c, 0x16, 0x7a, 0x8b, 0x54, 0x7c, 0x8d, 0x28,
-        0xe0, 0xa3, 0xae, 0x1e, 0x2b, 0xb3, 0xa6, 0x75,
-        0x91, 0x6e, 0xa3, 0x7f, 0x0b, 0xfa, 0x21, 0x35,
-        0x62, 0xf1, 0xfb, 0x62, 0x7a, 0x01, 0x24, 0x3b,
-        0xcc, 0xa4, 0xf1, 0xbe, 0xa8, 0x51, 0x90, 0x89,
-        0xa8, 0x83, 0xdf, 0xe1, 0x5a, 0xe5, 0x9f, 0x06,
-        0x92, 0x8b, 0x66, 0x5e, 0x80, 0x7b, 0x55, 0x25,
-        0x64, 0x01, 0x4c, 0x3b, 0xfe, 0xcf, 0x49, 0x2a,
-    },
-};
-
+#if 0
 void test_dh (void)
 void test_dh (void)
 {
 {
     int ret;
     int ret;
@@ -642,6 +593,7 @@ void test_dh (void)
     SGX_DBG(DBG_S, "key exchange(side B): %s (%d)\n", __hex2str(agree2, agreesz2),
     SGX_DBG(DBG_S, "key exchange(side B): %s (%d)\n", __hex2str(agree2, agreesz2),
            agreesz2);
            agreesz2);
 }
 }
+#endif
 
 
 #include "crypto/rsa.h"
 #include "crypto/rsa.h"
 
 
@@ -669,7 +621,7 @@ int init_enclave (void)
         goto out_free;
         goto out_free;
     }
     }
 
 
-    PAL_SHA256_CONTEXT sha256;
+    LIB_SHA256_CONTEXT sha256;
 
 
     ret = lib_SHA256Init(&sha256);
     ret = lib_SHA256Init(&sha256);
     if (ret < 0)
     if (ret < 0)
@@ -699,46 +651,56 @@ out_free:
 int _DkStreamKeyExchange (PAL_HANDLE stream, PAL_SESSION_KEY * keyptr)
 int _DkStreamKeyExchange (PAL_HANDLE stream, PAL_SESSION_KEY * keyptr)
 {
 {
     unsigned char session_key[32] __attribute__((aligned(32)));
     unsigned char session_key[32] __attribute__((aligned(32)));
-    unsigned char priv[128]  __attribute__((aligned(128))),
-                  pub[128]   __attribute__((aligned(128))),
-                  agree[128] __attribute__((aligned(128)));
-    uint32_t privsz, pubsz, agreesz;
-    DhKey dh;
+    uint8_t pub[DH_SIZE]   __attribute__((aligned(DH_SIZE)));
+    uint8_t agree[DH_SIZE] __attribute__((aligned(DH_SIZE)));
+    PAL_NUM pubsz, agreesz;
+    LIB_DH_CONTEXT context;
     int ret;
     int ret;
 
 
-    InitDhKey(&dh);
-
-    ret = DhSetKey(&dh, dh_param.p, sizeof(dh_param.p), dh_param.g,
-                   sizeof(dh_param.g));
+    ret = lib_DhInit(&context);
     if (ret < 0) {
     if (ret < 0) {
-        SGX_DBG(DBG_S, "Key Exchange: DhSetKey failed: %d\n", ret);
-        goto out;
+        SGX_DBG(DBG_S, "Key Exchange: DH Init failed: %d\n", ret);
+        goto out_no_final;
     }
     }
 
 
-    ret = DhGenerateKeyPair(&dh, priv, &privsz, pub, &pubsz);
+    pubsz = sizeof pub;
+    ret = lib_DhCreatePublic(&context, pub, &pubsz);
     if (ret < 0) {
     if (ret < 0) {
-        SGX_DBG(DBG_S, "Key Exchange: DhGenerateKeyPair failed: %d\n", ret);
+        SGX_DBG(DBG_S, "Key Exchange: DH CreatePublic failed: %d\n", ret);
         goto out;
         goto out;
     }
     }
 
 
-    ret = _DkStreamWrite(stream, 0, pubsz, pub, NULL, 0);
-    if (ret < pubsz) {
+    assert(pubsz > 0 && pubsz <= DH_SIZE);
+    if (pubsz < DH_SIZE) {
+        /* Insert leading zero bytes if necessary. These values are big-
+         * endian, so we either need to know the length of the bignum or
+         * zero-pad at the beginning instead of the end. This code chooses
+         * to do the latter. */
+        memmove(pub + (DH_SIZE - pubsz), pub, pubsz);
+        memset(pub, 0, DH_SIZE - pubsz);
+    }
+
+    ret = _DkStreamWrite(stream, 0, DH_SIZE, pub, NULL, 0);
+    if (ret != DH_SIZE) {
         SGX_DBG(DBG_S, "Key Exchange: DkStreamWrite failed: %d\n", ret);
         SGX_DBG(DBG_S, "Key Exchange: DkStreamWrite failed: %d\n", ret);
         goto out;
         goto out;
     }
     }
 
 
-    ret = _DkStreamRead(stream, 0, pubsz, pub, NULL, 0);
-    if (ret < pubsz) {
+    ret = _DkStreamRead(stream, 0, DH_SIZE, pub, NULL, 0);
+    if (ret != DH_SIZE) {
         SGX_DBG(DBG_S, "Key Exchange: DkStreamRead failed: %d\n", ret);
         SGX_DBG(DBG_S, "Key Exchange: DkStreamRead failed: %d\n", ret);
         goto out;
         goto out;
     }
     }
 
 
-    ret = DhAgree(&dh, agree, &agreesz, priv, privsz, pub, pubsz);
+    agreesz = sizeof agree;
+    ret = lib_DhCalcSecret(&context, pub, DH_SIZE, agree, &agreesz);
     if (ret < 0) {
     if (ret < 0) {
-        SGX_DBG(DBG_S, "Key Exchange: DhAgree failed: %d\n", ret);
+        SGX_DBG(DBG_S, "Key Exchange: DH CalcSecret failed: %d\n", ret);
         goto out;
         goto out;
     }
     }
 
 
+    assert(agreesz > 0 && agreesz <= sizeof agree);
+    // TODO(security): use a real KDF
     memset(session_key, 0, sizeof(session_key));
     memset(session_key, 0, sizeof(session_key));
     for (int i = 0 ; i < agreesz ; i++)
     for (int i = 0 ; i < agreesz ; i++)
         session_key[i % sizeof(session_key)] ^= agree[i];
         session_key[i % sizeof(session_key)] ^= agree[i];
@@ -750,7 +712,8 @@ int _DkStreamKeyExchange (PAL_HANDLE stream, PAL_SESSION_KEY * keyptr)
 
 
     ret = 0;
     ret = 0;
 out:
 out:
-    FreeDhKey(&dh);
+    lib_DhFinal(&context);
+out_no_final:
     return ret;
     return ret;
 }
 }