dev-libs/nss/files/nss-3.73-CVE-2023-5388.patch - third_party/overlays/chromiumos-overlay - Git at Google

 --- a/lib/freebl/mpi/mpi.c
 +++ b/lib/freebl/mpi/mpi.c
 @@ -13,6 +13,8 @@
  #include <c_asm.h>
  #endif

 +#include <assert.h>
 +
  #if defined(__arm__) && \
      ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
  /* 16-bit thumb or ARM v3 doesn't work inlined assember version */
 @@ -817,15 +819,18 @@

  /* }}} */

 -/* {{{ mp_mul(a, b, c) */
 +/* {{{ s_mp_mulg(a, b, c) */

  /*
 -  mp_mul(a, b, c)
 +  s_mp_mulg(a, b, c)

 -  Compute c = a * b.  All parameters may be identical.
 +  Compute c = a * b.  All parameters may be identical. if constantTime is set,
 +  then the operations are done in constant time. The original is mostly
 +  constant time as long as s_mpv_mul_d_add() is constant time. This is true
 +  of the x86 assembler, as well as the current c code.
   */
  mp_err
 -mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
 +s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime)
  {
      mp_digit *pb;
      mp_int tmp;
 @@ -861,7 +866,14 @@
          goto CLEANUP;

  #ifdef NSS_USE_COMBA
 -    if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
 +    /* comba isn't constant time because it clamps! If we cared
 +     * (we needed a constant time version of multiply that was 'faster'
 +     * we could easily pass constantTime down to the comba code and
 +     * get it to skip the clamp... but here are assembler versions
 +     * which add comba to platforms that can't compile the normal
 +     * comba's imbedded assembler which would also need to change, so
 +     * for now we just skip comba when we are running constant time. */
 +    if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
          if (MP_USED(a) == 4) {
              s_mp_mul_comba_4(a, b, c);
              goto CLEANUP;
 @@ -891,13 +903,15 @@
          mp_digit b_i = *pb++;

          /* Inner product:  Digits of a */
 -        if (b_i)
 +        if (constantTime || b_i)
              s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
          else
              MP_DIGIT(c, ib + useda) = b_i;
      }

 -    s_mp_clamp(c);
 +    if (!constantTime) {
 +        s_mp_clamp(c);
 +    }

      if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
          SIGN(c) = ZPOS;
 @@ -907,10 +921,54 @@
  CLEANUP:
      mp_clear(&tmp);
      return res;
 +} /* end smp_mulg() */
 +
 +/* }}} */
 +
 +/* {{{ mp_mul(a, b, c) */
 +
 +/*
 +  mp_mul(a, b, c)
 +
 +  Compute c = a * b.  All parameters may be identical.
 + */
 +
 +mp_err
 +mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
 +{
 +    return s_mp_mulg(a, b, c, 0);
  } /* end mp_mul() */

  /* }}} */

 +/* {{{ mp_mulCT(a, b, c) */
 +
 +/*
 +  mp_mulCT(a, b, c)
 +
 +  Compute c = a * b. In constant time. Parameters may not be identical.
 +  NOTE: a and b may be modified.
 + */
 +
 +mp_err
 +mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize)
 +{
 +    mp_err res;
 +
 +    /* make the multiply values fixed length so multiply
 +     * doesn't leak the length. at this point all the
 +     * values are blinded, but once we finish we want the
 +     * output size to be hidden (so no clamping the out put) */
 +    MP_CHECKOK(s_mp_pad(a, setSize));
 +    MP_CHECKOK(s_mp_pad(b, setSize));
 +    MP_CHECKOK(s_mp_pad(c, 2 * setSize));
 +    MP_CHECKOK(s_mp_mulg(a, b, c, 1));
 +CLEANUP:
 +    return res;
 +} /* end mp_mulCT() */
 +
 +/* }}} */
 +
  /* {{{ mp_sqr(a, sqr) */

  #if MP_SQUARE
 @@ -1283,6 +1341,138 @@

  /* }}} */

 +/* {{{ s_mp_subCT_d(a, b, borrow, c) */
 +
 +/*
 +  s_mp_subCT_d(a, b, borrow, c)
 +
 +  Compute c = (a -b) - subtract in constant time. returns borrow
 + */
 +mp_digit
 +s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret)
 +{
 +    *ret = a - b - borrow;
 +    return MP_CT_LTU(a, *ret) | (MP_CT_EQ(a, *ret) & borrow);
 +} /*  s_mp_subCT_d() */
 +
 +/* }}} */
 +
 +/* {{{ mp_subCT(a, b, ret, borrow) */
 +
 +/* return ret= a - b and borrow in borrow. done in constant time.
 + * b could be modified.
 + */
 +mp_err
 +mp_subCT(const mp_int *a, mp_int *b, mp_int *ret, mp_digit *borrow)
 +{
 +    mp_size used_a = MP_USED(a);
 +    mp_size i;
 +    mp_err res;
 +
 +    MP_CHECKOK(s_mp_pad(b, used_a));
 +    MP_CHECKOK(s_mp_pad(ret, used_a));
 +    *borrow = 0;
 +    for (i = 0; i < used_a; i++) {
 +        *borrow = s_mp_subCT_d(MP_DIGIT(a, i), MP_DIGIT(b, i), *borrow,
 +                               &MP_DIGIT(ret, i));
 +    }
 +
 +    res = MP_OKAY;
 +CLEANUP:
 +    return res;
 +} /*  end mp_subCT() */
 +
 +/* }}} */
 +
 +/* {{{ mp_selectCT(cond, a, b, ret) */
 +
 +/*
 + * return ret= cond ? a : b; cond should be either 0 or 1
 + */
 +mp_err
 +mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret)
 +{
 +    mp_size used_a = MP_USED(a);
 +    mp_err res;
 +    mp_size i;
 +
 +    cond *= MP_DIGIT_MAX;
 +
 +    /* we currently require these to be equal on input,
 +     * we could use pad to extend one of them, but that might
 +     * leak data as it wouldn't be constant time */
 +    if (used_a != MP_USED(b)) {
 +        return MP_BADARG;
 +    }
 +
 +    MP_CHECKOK(s_mp_pad(ret, used_a));
 +    for (i = 0; i < used_a; i++) {
 +        MP_DIGIT(ret, i) = MP_CT_SEL_DIGIT(cond, MP_DIGIT(a, i), MP_DIGIT(b, i));
 +    }
 +    res = MP_OKAY;
 +CLEANUP:
 +    return res;
 +} /* end mp_selectCT() */
 +
 +/* {{{ mp_reduceCT(a, m, c) */
 +
 +/*
 +  mp_reduceCT(a, m, c)
 +
 +  Compute c = aR^-1 (mod m) in constant time.
 +   input should be in montgomery form. If input is the
 +   result of a montgomery multiply then out put will be
 +   in mongomery form.
 +   Result will be reduced to MP_USED(m), but not be
 +   clamped.
 + */
 +
 +mp_err
 +mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c)
 +{
 +    mp_size used_m = MP_USED(m);
 +    mp_size used_c = used_m * 2 + 1;
 +    mp_digit *m_digits, *c_digits;
 +    mp_size i;
 +    mp_digit borrow, carry;
 +    mp_err res;
 +    mp_int sub;
 +
 +    MP_DIGITS(&sub) = 0;
 +    MP_CHECKOK(mp_init_size(&sub, used_m));
 +
 +    if (a != c) {
 +        MP_CHECKOK(mp_copy(a, c));
 +    }
 +    MP_CHECKOK(s_mp_pad(c, used_c));
 +    m_digits = MP_DIGITS(m);
 +    c_digits = MP_DIGITS(c);
 +    for (i = 0; i < used_m; i++) {
 +        mp_digit m_i = MP_DIGIT(c, i) * n0i;
 +        s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--);
 +    }
 +    s_mp_rshd(c, used_m);
 +    /* MP_USED(c) should be used_m+1 with the high word being any carry
 +     * from the previous multiply, save that carry and drop the high
 +     * word for the substraction below */
 +    carry = MP_DIGIT(c, used_m);
 +    MP_DIGIT(c, used_m) = 0;
 +    MP_USED(c) = used_m;
 +    /* mp_subCT wants c and m to be the same size, we've already
 +     * guarrenteed that in the previous statement, so mp_subCT won't actually
 +     * modify m, so it's safe to recast */
 +    MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow));
 +
 +    /* we return c-m if c >= m no borrow or there was a borrow and a carry */
 +    MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c));
 +    res = MP_OKAY;
 +CLEANUP:
 +    mp_clear(&sub);
 +    return res;
 +} /* end mp_reduceCT() */
 +
 +/* }}} */
 +
  /* {{{ mp_mod_d(a, d, c) */

  /*
 @@ -1399,6 +1589,37 @@

  /* }}} */

 +/* {{{ mp_mulmontmodCT(a, b, m, c) */
 +
 +/*
 +  mp_mulmontmodCT(a, b, m, c)
 +
 +  Compute c = (a * b) mod m in constant time wrt a and b. either a or b
 +  should be in montgomery form and the output is native. If both a and b
 +  are in montgomery form, then the output will also be in montgomery form
 +  and can be recovered with an mp_reduceCT call.
 +  NOTE: a and b may be modified.
 + */
 +
 +mp_err
 +mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i,
 +                mp_int *c)
 +{
 +    mp_err res;
 +
 +    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
 +
 +    if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY)
 +        return res;
 +
 +    if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY)
 +        return res;
 +
 +    return MP_OKAY;
 +}
 +
 +/* }}} */
 +
  /* {{{ mp_sqrmod(a, m, c) */

  #if MP_SQUARE
 @@ -3942,14 +4163,62 @@
          a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
          a1b0 += a0b1;                                              \
          Phi += a1b0 >> MP_HALF_DIGIT_BIT;                          \
 -        if (a1b0 < a0b1)                                           \
 -            Phi += MP_HALF_RADIX;                                  \
 +        Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT;       \
          a1b0 <<= MP_HALF_DIGIT_BIT;                                \
          Plo += a1b0;                                               \
 -        if (Plo < a1b0)                                            \
 -            ++Phi;                                                 \
 +        Phi += MP_CT_LTU(Plo, a1b0);                               \
 +    }
 +#endif
 +
 +/* Constant time version of s_mpv_mul_d_add_prop.
 + * Presently, this is only used by the Constant time Montgomery arithmetic code. */
 +/* c += a * b */
 +void
 +s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b,
 +                       mp_digit *c, mp_size c_len)
 +{
 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
 +    mp_digit d = 0;
 +
 +    c_len -= a_len;
 +    /* Inner product:  Digits of a */
 +    while (a_len--) {
 +        mp_word w = ((mp_word)b * *a++) + *c + d;
 +        *c++ = ACCUM(w);
 +        d = CARRYOUT(w);
 +    }
 +
 +    /* propagate the carry to the end, even if carry is zero */
 +    while (c_len--) {
 +        mp_word w = (mp_word)*c + d;
 +        *c++ = ACCUM(w);
 +        d = CARRYOUT(w);
 +    }
 +#else
 +    mp_digit carry = 0;
 +    c_len -= a_len;
 +    while (a_len--) {
 +        mp_digit a_i = *a++;
 +        mp_digit a0b0, a1b1;
 +        MP_MUL_DxD(a_i, b, a1b1, a0b0);
 +
 +        a0b0 += carry;
 +        a1b1 += MP_CT_LTU(a0b0, carry);
 +        a0b0 += a_i = *c;
 +        a1b1 += MP_CT_LTU(a0b0, a_i);
 +
 +        *c++ = a0b0;
 +        carry = a1b1;
 +    }
 +    /* propagate the carry to the end, even if carry is zero */
 +    while (c_len--) {
 +        mp_digit c_i = *c;
 +        carry += c_i;
 +        *c++ = carry;
 +        carry = MP_CT_LTU(carry, c_i);
      }
  #endif
 +}

  #if !defined(MP_ASSEMBLY_MULTIPLY)
  /* c = a * b */
 @@ -3975,8 +4244,7 @@
          MP_MUL_DxD(a_i, b, a1b1, a0b0);

          a0b0 += carry;
 -        if (a0b0 < carry)
 -            ++a1b1;
 +        a1b1 += MP_CT_LTU(a0b0, carry);
          *c++ = a0b0;
          carry = a1b1;
      }
 @@ -4008,11 +4276,9 @@
          MP_MUL_DxD(a_i, b, a1b1, a0b0);

          a0b0 += carry;
 -        if (a0b0 < carry)
 -            ++a1b1;
 +        a1b1 += MP_CT_LTU(a0b0, carry);
          a0b0 += a_i = *c;
 -        if (a0b0 < a_i)
 -            ++a1b1;
 +        a1b1 += MP_CT_LTU(a0b0, a_i);
          *c++ = a0b0;
          carry = a1b1;
      }
 --- a/lib/freebl/mpi/mpi.h
 +++ b/lib/freebl/mpi/mpi.h
 @@ -150,6 +150,38 @@
  /* This defines the maximum I/O base (minimum is 2)   */
  #define MP_MAX_RADIX 64

 +/* Constant Time Macros on mp_digits */
 +#define MP_CT_HIGH_TO_LOW(x) ((mp_digit)((mp_digit)(x) >> (MP_DIGIT_BIT - 1)))
 +#define MP_CT_TRUE ((mp_digit)1)
 +#define MP_CT_FALSE ((mp_digit)0)
 +
 +/* basic zero and non zero tests */
 +#define MP_CT_NOT_ZERO(x) (MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
 +#define MP_CT_ZERO(x) (MP_CT_TRUE ^ MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
 +
 +/* basic constant-time helper macro for equalities and inequalities.
 + * The inequalities will produce incorrect results if
 + * abs(a-b) >= MP_DIGIT_SIZE/2. This can be avoided if unsigned values stay
 + * within the range 0-MP_DIGIT_MAX/2. */
 +#define MP_CT_EQ(a, b) MP_CT_ZERO(((a) ^ (b)))
 +#define MP_CT_NE(a, b) MP_CT_NOT_ZERO(((a) ^ (b)))
 +#define MP_CT_GT(a, b) MP_CT_HIGH_TO_LOW((b) - (a))
 +#define MP_CT_LT(a, b) MP_CT_HIGH_TO_LOW((a) - (b))
 +#define MP_CT_GE(a, b) (MP_CT_TRUE ^ MP_CT_LT(a, b))
 +#define MP_CT_LE(a, b) (MP_CT_TRUE ^ MP_CT_GT(a, b))
 +
 +/* use constant time result to select a boolean value
 + * or an mp digit depending on the args */
 +#define MP_CT_SEL(m, l, r) ((r) ^ ((m) & ((r) ^ (l))))
 +#define MP_CT_SELB(m, l, r) MP_CT_SEL(m, l, r)      /* mask, l and r are booleans */
 +#define MP_CT_SEL_DIGIT(m, l, r) MP_CT_SEL(m, l, r) /*mask, l, and r are mp_digit */
 +
 +/* full inequalities that work with full mp_digit values */
 +#define MP_CT_OVERFLOW(a, b, c, d)           \
 +    MP_CT_SELB(MP_CT_HIGH_TO_LOW((a) ^ (b)), \
 +               (MP_CT_HIGH_TO_LOW(d)), c)
 +#define MP_CT_LTU(a, b) MP_CT_OVERFLOW(a, b, MP_CT_LT(a, b), b)
 +
  typedef struct {
      mp_sign sign;  /* sign of this quantity      */
      mp_size alloc; /* how many digits allocated  */
 @@ -190,7 +222,9 @@
  /* Full arithmetic         */
  mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c);
  mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
 +mp_err mp_subCT(const mp_int *a, mp_int *b, mp_int *c, mp_digit *borrow);
  mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c);
 +mp_err mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize);
  #if MP_SQUARE
  mp_err mp_sqr(const mp_int *a, mp_int *b);
  #else
 @@ -217,6 +251,12 @@
  mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c);
  #endif /* MP_MODARITH */

 +/* montgomery math */
 +mp_err mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont);
 +mp_digit mp_calculate_mont_n0i(const mp_int *N);
 +mp_err mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *ct);
 +mp_err mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i, mp_int *c);
 +
  /* Comparisons             */
  int mp_cmp_z(const mp_int *a);
  int mp_cmp_d(const mp_int *a, mp_digit d);
 @@ -224,6 +264,7 @@
  int mp_cmp_mag(const mp_int *a, const mp_int *b);
  int mp_isodd(const mp_int *a);
  int mp_iseven(const mp_int *a);
 +mp_err mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret);

  /* Number theoretic        */
  mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c);
 --- a/lib/freebl/mpi/mpmontg.c
 +++ b/lib/freebl/mpi/mpmontg.c
 @@ -129,20 +129,27 @@
  }
  #endif

 -STATIC
  mp_err
 -s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont)
 +mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont)
  {
      mp_err res;

      /* xMont = x * R mod N   where  N is modulus */
 -    MP_CHECKOK(mp_copy(x, xMont));
 -    MP_CHECKOK(s_mp_lshd(xMont, MP_USED(&mmm->N))); /* xMont = x << b */
 -    MP_CHECKOK(mp_div(xMont, &mmm->N, 0, xMont));   /*         mod N */
 +    if (x != xMont) {
 +        MP_CHECKOK(mp_copy(x, xMont));
 +    }
 +    MP_CHECKOK(s_mp_lshd(xMont, MP_USED(N))); /* xMont = x << b */
 +    MP_CHECKOK(mp_div(xMont, N, 0, xMont));   /*         mod N */
  CLEANUP:
      return res;
  }

 +mp_digit
 +mp_calculate_mont_n0i(const mp_int *N)
 +{
 +    return 0 - s_mp_invmod_radix(MP_DIGIT(N, 0));
 +}
 +
  #ifdef MP_USING_MONT_MULF

  /* the floating point multiply is already cache safe,
 @@ -198,7 +205,7 @@
      MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));

      mp_set(&accum1, 1);
 -    MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
 +    MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
      MP_CHECKOK(s_mp_pad(&accum1, nLen));

      oddPowSize = 2 * nLen + 1;
 @@ -478,7 +485,7 @@

      /* set accumulator to montgomery residue of 1 */
      mp_set(&accum1, 1);
 -    MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
 +    MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
      pa1 = &accum1;
      pa2 = &accum2;

 @@ -865,7 +872,7 @@
          MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2));
          MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2));
          mp_set(&accum[0], 1);
 -        MP_CHECKOK(s_mp_to_mont(&accum[0], mmm, &accum[0]));
 +        MP_CHECKOK(mp_to_mont(&accum[0], &(mmm->N), &accum[0]));
          MP_CHECKOK(mp_copy(montBase, &accum[1]));
          SQR(montBase, &accum[2]);
          MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
 @@ -884,7 +891,7 @@
      } else {
          if (first_window == 0) {
              mp_set(&accum1, 1);
 -            MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
 +            MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
          } else {
              /* assert first_window == 1? */
              MP_CHECKOK(mp_copy(montBase, &accum1));
 @@ -1051,9 +1058,9 @@
      /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX
      **        where n0 = least significant mp_digit of N, the modulus.
      */
 -    mmm.n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(modulus, 0));
 +    mmm.n0prime = mp_calculate_mont_n0i(modulus);

 -    MP_CHECKOK(s_mp_to_mont(base, &mmm, &montBase));
 +    MP_CHECKOK(mp_to_mont(base, modulus, &montBase));

      bits_in_exponent = mpl_significant_bits(exponent);
  #ifdef MP_USING_CACHE_SAFE_MOD_EXP
 --- a/lib/freebl/rsa.c
 +++ b/lib/freebl/rsa.c
 @@ -64,6 +64,8 @@
      SECItem modulus;           /* list element "key"                 */
      blindingParams *free, *bp; /* Blinding parameters queue          */
      blindingParams array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE];
 +    /* precalculate montegomery reduction value */
 +    mp_digit n0i; /* n0i = -( n & MP_DIGIT) ** -1 mod mp_RADIX */
  };
  typedef struct RSABlindingParamsStr RSABlindingParams;

 @@ -1146,6 +1148,8 @@
      CHECK_MPI_OK(mp_exptmod(&k, &e, n, f));
      /* g = k**-1 mod n */
      CHECK_MPI_OK(mp_invmod(&k, n, g));
 +    /* g in montgomery form.. */
 +    CHECK_MPI_OK(mp_to_mont(g, n, g));
  cleanup:
      if (kb)
          PORT_ZFree(kb, modLen);
 @@ -1182,13 +1186,16 @@
      rsabp->bp = NULL;
      rsabp->free = bp;

 +    /* precalculate montgomery reduction parameter */
 +    rsabp->n0i = mp_calculate_mont_n0i(n);
 +
      /* List elements are keyed using the modulus */
      return SECITEM_CopyItem(NULL, &rsabp->modulus, &key->modulus);
  }

  static SECStatus
  get_blinding_params(RSAPrivateKey *key, mp_int *n, unsigned int modLen,
 -                    mp_int *f, mp_int *g)
 +                    mp_int *f, mp_int *g, mp_digit *n0i)
  {
      RSABlindingParams *rsabp = NULL;
      blindingParams *bpUnlinked = NULL;
 @@ -1248,6 +1255,7 @@
          /* We've found (or created) the RSAblindingParams struct for this key.
           * Now, search its list of ready blinding params for a usable one.
           */
 +        *n0i = rsabp->n0i;
          while (0 != (bp = rsabp->bp)) {
  #ifndef UNSAFE_FUZZER_MODE
              if (--(bp->counter) > 0)
 @@ -1355,6 +1363,7 @@
      if (err) {
          MP_TO_SEC_ERROR(err);
      }
 +    *n0i = 0;
      return SECFailure;
  }

 @@ -1374,6 +1383,7 @@
      mp_err err;
      mp_int n, c, m;
      mp_int f, g;
 +    mp_digit n0i;
      if (!key || !output || !input) {
          PORT_SetError(SEC_ERROR_INVALID_ARGS);
          return SECFailure;
 @@ -1401,7 +1411,7 @@
      ** blinding factor
      */
      if (nssRSAUseBlinding) {
 -        CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g));
 +        CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g, &n0i));
          /* c' = c*f mod n */
          CHECK_MPI_OK(mp_mulmod(&c, &f, &n, &c));
      }
 @@ -1422,7 +1432,7 @@
      */
      if (nssRSAUseBlinding) {
          /* m = m'*g mod n */
 -        CHECK_MPI_OK(mp_mulmod(&m, &g, &n, &m));
 +        CHECK_MPI_OK(mp_mulmontmodCT(&m, &g, &n, n0i, &m));
      }
      err = mp_to_fixlen_octets(&m, output, modLen);
      if (err >= 0)
 --- a/lib/freebl/mpi/mpi-priv.h
 +++ b/lib/freebl/mpi/mpi-priv.h
 @@ -204,6 +204,9 @@
  void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a,
                                         mp_size a_len, mp_digit b,
                                         mp_digit *c);
 +void MPI_ASM_DECL s_mpv_mul_d_add_propCT(const mp_digit *a,
 +                                         mp_size a_len, mp_digit b,
 +                                         mp_digit *c, mp_size c_len);
  void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a,
                                       mp_size a_len,
                                       mp_digit *sqrs);
	--- a/lib/freebl/mpi/mpi.c
	+++ b/lib/freebl/mpi/mpi.c
	@@ -13,6 +13,8 @@
	#include <c_asm.h>
	#endif

	+#include <assert.h>
	+
	#if defined(__arm__) && \
	((defined(__thumb__) && !defined(__thumb2__)) \|\| defined(__ARM_ARCH_3__))
	/* 16-bit thumb or ARM v3 doesn't work inlined assember version */
	@@ -817,15 +819,18 @@

	/* }}} */

	-/* {{{ mp_mul(a, b, c) */
	+/* {{{ s_mp_mulg(a, b, c) */

	/*
	- mp_mul(a, b, c)
	+ s_mp_mulg(a, b, c)

	- Compute c = a * b. All parameters may be identical.
	+ Compute c = a * b. All parameters may be identical. if constantTime is set,
	+ then the operations are done in constant time. The original is mostly
	+ constant time as long as s_mpv_mul_d_add() is constant time. This is true
	+ of the x86 assembler, as well as the current c code.
	*/
	mp_err
	-mp_mul(const mp_int a, const mp_int b, mp_int *c)
	+s_mp_mulg(const mp_int a, const mp_int b, mp_int *c, int constantTime)
	{
	mp_digit *pb;
	mp_int tmp;
	@@ -861,7 +866,14 @@
	goto CLEANUP;

	#ifdef NSS_USE_COMBA
	- if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
	+ /* comba isn't constant time because it clamps! If we cared
	+ * (we needed a constant time version of multiply that was 'faster'
	+ * we could easily pass constantTime down to the comba code and
	+ * get it to skip the clamp... but here are assembler versions
	+ * which add comba to platforms that can't compile the normal
	+ * comba's imbedded assembler which would also need to change, so
	+ * for now we just skip comba when we are running constant time. */
	+ if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
	if (MP_USED(a) == 4) {
	s_mp_mul_comba_4(a, b, c);
	goto CLEANUP;
	@@ -891,13 +903,15 @@
	mp_digit b_i = *pb++;

	/* Inner product: Digits of a */
	- if (b_i)
	+ if (constantTime \|\| b_i)
	s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
	else
	MP_DIGIT(c, ib + useda) = b_i;
	}

	- s_mp_clamp(c);
	+ if (!constantTime) {
	+ s_mp_clamp(c);
	+ }

	if (SIGN(a) == SIGN(b) \|\| s_mp_cmp_d(c, 0) == MP_EQ)
	SIGN(c) = ZPOS;
	@@ -907,10 +921,54 @@
	CLEANUP:
	mp_clear(&tmp);
	return res;
	+} /* end smp_mulg() */
	+
	+/* }}} */
	+
	+/* {{{ mp_mul(a, b, c) */
	+
	+/*
	+ mp_mul(a, b, c)
	+
	+ Compute c = a * b. All parameters may be identical.
	+ */
	+
	+mp_err
	+mp_mul(const mp_int a, const mp_int b, mp_int *c)
	+{
	+ return s_mp_mulg(a, b, c, 0);
	} /* end mp_mul() */

	/* }}} */

	+/* {{{ mp_mulCT(a, b, c) */
	+
	+/*
	+ mp_mulCT(a, b, c)
	+
	+ Compute c = a * b. In constant time. Parameters may not be identical.
	+ NOTE: a and b may be modified.
	+ */
	+
	+mp_err
	+mp_mulCT(mp_int a, mp_int b, mp_int *c, mp_size setSize)
	+{
	+ mp_err res;
	+
	+ /* make the multiply values fixed length so multiply
	+ * doesn't leak the length. at this point all the
	+ * values are blinded, but once we finish we want the
	+ * output size to be hidden (so no clamping the out put) */
	+ MP_CHECKOK(s_mp_pad(a, setSize));
	+ MP_CHECKOK(s_mp_pad(b, setSize));
	+ MP_CHECKOK(s_mp_pad(c, 2 * setSize));
	+ MP_CHECKOK(s_mp_mulg(a, b, c, 1));
	+CLEANUP:
	+ return res;
	+} /* end mp_mulCT() */
	+
	+/* }}} */
	+
	/* {{{ mp_sqr(a, sqr) */

	#if MP_SQUARE
	@@ -1283,6 +1341,138 @@

	/* }}} */

	+/* {{{ s_mp_subCT_d(a, b, borrow, c) */
	+
	+/*
	+ s_mp_subCT_d(a, b, borrow, c)
	+
	+ Compute c = (a -b) - subtract in constant time. returns borrow
	+ */
	+mp_digit
	+s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret)
	+{
	+ *ret = a - b - borrow;
	+ return MP_CT_LTU(a, ret) \| (MP_CT_EQ(a, ret) & borrow);
	+} /* s_mp_subCT_d() */
	+
	+/* }}} */
	+
	+/* {{{ mp_subCT(a, b, ret, borrow) */
	+
	+/* return ret= a - b and borrow in borrow. done in constant time.
	+ * b could be modified.
	+ */
	+mp_err
	+mp_subCT(const mp_int a, mp_int b, mp_int ret, mp_digit borrow)
	+{
	+ mp_size used_a = MP_USED(a);
	+ mp_size i;
	+ mp_err res;
	+
	+ MP_CHECKOK(s_mp_pad(b, used_a));
	+ MP_CHECKOK(s_mp_pad(ret, used_a));
	+ *borrow = 0;
	+ for (i = 0; i < used_a; i++) {
	+ borrow = s_mp_subCT_d(MP_DIGIT(a, i), MP_DIGIT(b, i), borrow,
	+ &MP_DIGIT(ret, i));
	+ }
	+
	+ res = MP_OKAY;
	+CLEANUP:
	+ return res;
	+} /* end mp_subCT() */
	+
	+/* }}} */
	+
	+/* {{{ mp_selectCT(cond, a, b, ret) */
	+
	+/*
	+ * return ret= cond ? a : b; cond should be either 0 or 1
	+ */
	+mp_err
	+mp_selectCT(mp_digit cond, const mp_int a, const mp_int b, mp_int *ret)
	+{
	+ mp_size used_a = MP_USED(a);
	+ mp_err res;
	+ mp_size i;
	+
	+ cond *= MP_DIGIT_MAX;
	+
	+ /* we currently require these to be equal on input,
	+ * we could use pad to extend one of them, but that might
	+ * leak data as it wouldn't be constant time */
	+ if (used_a != MP_USED(b)) {
	+ return MP_BADARG;
	+ }
	+
	+ MP_CHECKOK(s_mp_pad(ret, used_a));
	+ for (i = 0; i < used_a; i++) {
	+ MP_DIGIT(ret, i) = MP_CT_SEL_DIGIT(cond, MP_DIGIT(a, i), MP_DIGIT(b, i));
	+ }
	+ res = MP_OKAY;
	+CLEANUP:
	+ return res;
	+} /* end mp_selectCT() */
	+
	+/* {{{ mp_reduceCT(a, m, c) */
	+
	+/*
	+ mp_reduceCT(a, m, c)
	+
	+ Compute c = aR^-1 (mod m) in constant time.
	+ input should be in montgomery form. If input is the
	+ result of a montgomery multiply then out put will be
	+ in mongomery form.
	+ Result will be reduced to MP_USED(m), but not be
	+ clamped.
	+ */
	+
	+mp_err
	+mp_reduceCT(const mp_int a, const mp_int m, mp_digit n0i, mp_int *c)
	+{
	+ mp_size used_m = MP_USED(m);
	+ mp_size used_c = used_m * 2 + 1;
	+ mp_digit m_digits, c_digits;
	+ mp_size i;
	+ mp_digit borrow, carry;
	+ mp_err res;
	+ mp_int sub;
	+
	+ MP_DIGITS(&sub) = 0;
	+ MP_CHECKOK(mp_init_size(&sub, used_m));
	+
	+ if (a != c) {
	+ MP_CHECKOK(mp_copy(a, c));
	+ }
	+ MP_CHECKOK(s_mp_pad(c, used_c));
	+ m_digits = MP_DIGITS(m);
	+ c_digits = MP_DIGITS(c);
	+ for (i = 0; i < used_m; i++) {
	+ mp_digit m_i = MP_DIGIT(c, i) * n0i;
	+ s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--);
	+ }
	+ s_mp_rshd(c, used_m);
	+ /* MP_USED(c) should be used_m+1 with the high word being any carry
	+ * from the previous multiply, save that carry and drop the high
	+ * word for the substraction below */
	+ carry = MP_DIGIT(c, used_m);
	+ MP_DIGIT(c, used_m) = 0;
	+ MP_USED(c) = used_m;
	+ /* mp_subCT wants c and m to be the same size, we've already
	+ * guarrenteed that in the previous statement, so mp_subCT won't actually
	+ * modify m, so it's safe to recast */
	+ MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow));
	+
	+ /* we return c-m if c >= m no borrow or there was a borrow and a carry */
	+ MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c));
	+ res = MP_OKAY;
	+CLEANUP:
	+ mp_clear(&sub);
	+ return res;
	+} /* end mp_reduceCT() */
	+
	+/* }}} */
	+
	/* {{{ mp_mod_d(a, d, c) */

	/*
	@@ -1399,6 +1589,37 @@

	/* }}} */

	+/* {{{ mp_mulmontmodCT(a, b, m, c) */
	+
	+/*
	+ mp_mulmontmodCT(a, b, m, c)
	+
	+ Compute c = (a * b) mod m in constant time wrt a and b. either a or b
	+ should be in montgomery form and the output is native. If both a and b
	+ are in montgomery form, then the output will also be in montgomery form
	+ and can be recovered with an mp_reduceCT call.
	+ NOTE: a and b may be modified.
	+ */
	+
	+mp_err
	+mp_mulmontmodCT(mp_int a, mp_int b, const mp_int *m, mp_digit n0i,
	+ mp_int *c)
	+{
	+ mp_err res;
	+
	+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
	+
	+ if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY)
	+ return res;
	+
	+ if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY)
	+ return res;
	+
	+ return MP_OKAY;
	+}
	+
	+/* }}} */
	+
	/* {{{ mp_sqrmod(a, m, c) */

	#if MP_SQUARE
	@@ -3942,14 +4163,62 @@
	a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
	a1b0 += a0b1; \
	Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
	- if (a1b0 < a0b1) \
	- Phi += MP_HALF_RADIX; \
	+ Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT; \
	a1b0 <<= MP_HALF_DIGIT_BIT; \
	Plo += a1b0; \
	- if (Plo < a1b0) \
	- ++Phi; \
	+ Phi += MP_CT_LTU(Plo, a1b0); \
	+ }
	+#endif
	+
	+/* Constant time version of s_mpv_mul_d_add_prop.
	+ * Presently, this is only used by the Constant time Montgomery arithmetic code. */
	+/* c += a * b */
	+void
	+s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b,
	+ mp_digit *c, mp_size c_len)
	+{
	+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
	+ mp_digit d = 0;
	+
	+ c_len -= a_len;
	+ /* Inner product: Digits of a */
	+ while (a_len--) {
	+ mp_word w = ((mp_word)b * a++) + c + d;
	+ *c++ = ACCUM(w);
	+ d = CARRYOUT(w);
	+ }
	+
	+ /* propagate the carry to the end, even if carry is zero */
	+ while (c_len--) {
	+ mp_word w = (mp_word)*c + d;
	+ *c++ = ACCUM(w);
	+ d = CARRYOUT(w);
	+ }
	+#else
	+ mp_digit carry = 0;
	+ c_len -= a_len;
	+ while (a_len--) {
	+ mp_digit a_i = *a++;
	+ mp_digit a0b0, a1b1;
	+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
	+
	+ a0b0 += carry;
	+ a1b1 += MP_CT_LTU(a0b0, carry);
	+ a0b0 += a_i = *c;
	+ a1b1 += MP_CT_LTU(a0b0, a_i);
	+
	+ *c++ = a0b0;
	+ carry = a1b1;
	+ }
	+ /* propagate the carry to the end, even if carry is zero */
	+ while (c_len--) {
	+ mp_digit c_i = *c;
	+ carry += c_i;
	+ *c++ = carry;
	+ carry = MP_CT_LTU(carry, c_i);
	}
	#endif
	+}

	#if !defined(MP_ASSEMBLY_MULTIPLY)
	/* c = a * b */
	@@ -3975,8 +4244,7 @@
	MP_MUL_DxD(a_i, b, a1b1, a0b0);

	a0b0 += carry;
	- if (a0b0 < carry)
	- ++a1b1;
	+ a1b1 += MP_CT_LTU(a0b0, carry);
	*c++ = a0b0;
	carry = a1b1;
	}
	@@ -4008,11 +4276,9 @@
	MP_MUL_DxD(a_i, b, a1b1, a0b0);

	a0b0 += carry;
	- if (a0b0 < carry)
	- ++a1b1;
	+ a1b1 += MP_CT_LTU(a0b0, carry);
	a0b0 += a_i = *c;
	- if (a0b0 < a_i)
	- ++a1b1;
	+ a1b1 += MP_CT_LTU(a0b0, a_i);
	*c++ = a0b0;
	carry = a1b1;
	}
	--- a/lib/freebl/mpi/mpi.h
	+++ b/lib/freebl/mpi/mpi.h
	@@ -150,6 +150,38 @@
	/* This defines the maximum I/O base (minimum is 2) */
	#define MP_MAX_RADIX 64

	+/* Constant Time Macros on mp_digits */
	+#define MP_CT_HIGH_TO_LOW(x) ((mp_digit)((mp_digit)(x) >> (MP_DIGIT_BIT - 1)))
	+#define MP_CT_TRUE ((mp_digit)1)
	+#define MP_CT_FALSE ((mp_digit)0)
	+
	+/* basic zero and non zero tests */
	+#define MP_CT_NOT_ZERO(x) (MP_CT_HIGH_TO_LOW(((x) \| (((mp_digit)0) - (x)))))
	+#define MP_CT_ZERO(x) (MP_CT_TRUE ^ MP_CT_HIGH_TO_LOW(((x) \| (((mp_digit)0) - (x)))))
	+
	+/* basic constant-time helper macro for equalities and inequalities.
	+ * The inequalities will produce incorrect results if
	+ * abs(a-b) >= MP_DIGIT_SIZE/2. This can be avoided if unsigned values stay
	+ * within the range 0-MP_DIGIT_MAX/2. */
	+#define MP_CT_EQ(a, b) MP_CT_ZERO(((a) ^ (b)))
	+#define MP_CT_NE(a, b) MP_CT_NOT_ZERO(((a) ^ (b)))
	+#define MP_CT_GT(a, b) MP_CT_HIGH_TO_LOW((b) - (a))
	+#define MP_CT_LT(a, b) MP_CT_HIGH_TO_LOW((a) - (b))
	+#define MP_CT_GE(a, b) (MP_CT_TRUE ^ MP_CT_LT(a, b))
	+#define MP_CT_LE(a, b) (MP_CT_TRUE ^ MP_CT_GT(a, b))
	+
	+/* use constant time result to select a boolean value
	+ * or an mp digit depending on the args */
	+#define MP_CT_SEL(m, l, r) ((r) ^ ((m) & ((r) ^ (l))))
	+#define MP_CT_SELB(m, l, r) MP_CT_SEL(m, l, r) /* mask, l and r are booleans */
	+#define MP_CT_SEL_DIGIT(m, l, r) MP_CT_SEL(m, l, r) /mask, l, and r are mp_digit /
	+
	+/* full inequalities that work with full mp_digit values */
	+#define MP_CT_OVERFLOW(a, b, c, d) \
	+ MP_CT_SELB(MP_CT_HIGH_TO_LOW((a) ^ (b)), \
	+ (MP_CT_HIGH_TO_LOW(d)), c)
	+#define MP_CT_LTU(a, b) MP_CT_OVERFLOW(a, b, MP_CT_LT(a, b), b)
	+
	typedef struct {
	mp_sign sign; /* sign of this quantity */
	mp_size alloc; /* how many digits allocated */
	@@ -190,7 +222,9 @@
	/* Full arithmetic */
	mp_err mp_add(const mp_int a, const mp_int b, mp_int *c);
	mp_err mp_sub(const mp_int a, const mp_int b, mp_int *c);
	+mp_err mp_subCT(const mp_int a, mp_int b, mp_int c, mp_digit borrow);
	mp_err mp_mul(const mp_int a, const mp_int b, mp_int *c);
	+mp_err mp_mulCT(mp_int a, mp_int b, mp_int *c, mp_size setSize);
	#if MP_SQUARE
	mp_err mp_sqr(const mp_int a, mp_int b);
	#else
	@@ -217,6 +251,12 @@
	mp_err mp_exptmod_d(const mp_int a, mp_digit d, const mp_int m, mp_int *c);
	#endif /* MP_MODARITH */

	+/* montgomery math */
	+mp_err mp_to_mont(const mp_int x, const mp_int N, mp_int *xMont);
	+mp_digit mp_calculate_mont_n0i(const mp_int *N);
	+mp_err mp_reduceCT(const mp_int a, const mp_int m, mp_digit n0i, mp_int *ct);
	+mp_err mp_mulmontmodCT(mp_int a, mp_int b, const mp_int m, mp_digit n0i, mp_int c);
	+
	/* Comparisons */
	int mp_cmp_z(const mp_int *a);
	int mp_cmp_d(const mp_int *a, mp_digit d);
	@@ -224,6 +264,7 @@
	int mp_cmp_mag(const mp_int a, const mp_int b);
	int mp_isodd(const mp_int *a);
	int mp_iseven(const mp_int *a);
	+mp_err mp_selectCT(mp_digit cond, const mp_int a, const mp_int b, mp_int *ret);

	/* Number theoretic */
	mp_err mp_gcd(mp_int a, mp_int b, mp_int *c);
	--- a/lib/freebl/mpi/mpmontg.c
	+++ b/lib/freebl/mpi/mpmontg.c
	@@ -129,20 +129,27 @@
	}
	#endif

	-STATIC
	mp_err
	-s_mp_to_mont(const mp_int x, mp_mont_modulus mmm, mp_int *xMont)
	+mp_to_mont(const mp_int x, const mp_int N, mp_int *xMont)
	{
	mp_err res;

	/* xMont = x * R mod N where N is modulus */
	- MP_CHECKOK(mp_copy(x, xMont));
	- MP_CHECKOK(s_mp_lshd(xMont, MP_USED(&mmm->N))); /* xMont = x << b */
	- MP_CHECKOK(mp_div(xMont, &mmm->N, 0, xMont)); /* mod N */
	+ if (x != xMont) {
	+ MP_CHECKOK(mp_copy(x, xMont));
	+ }
	+ MP_CHECKOK(s_mp_lshd(xMont, MP_USED(N))); /* xMont = x << b */
	+ MP_CHECKOK(mp_div(xMont, N, 0, xMont)); /* mod N */
	CLEANUP:
	return res;
	}

	+mp_digit
	+mp_calculate_mont_n0i(const mp_int *N)
	+{
	+ return 0 - s_mp_invmod_radix(MP_DIGIT(N, 0));
	+}
	+
	#ifdef MP_USING_MONT_MULF

	/* the floating point multiply is already cache safe,
	@@ -198,7 +205,7 @@
	MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));

	mp_set(&accum1, 1);
	- MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
	+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
	MP_CHECKOK(s_mp_pad(&accum1, nLen));

	oddPowSize = 2 * nLen + 1;
	@@ -478,7 +485,7 @@

	/* set accumulator to montgomery residue of 1 */
	mp_set(&accum1, 1);
	- MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
	+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
	pa1 = &accum1;
	pa2 = &accum2;

	@@ -865,7 +872,7 @@
	MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2));
	MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2));
	mp_set(&accum[0], 1);
	- MP_CHECKOK(s_mp_to_mont(&accum[0], mmm, &accum[0]));
	+ MP_CHECKOK(mp_to_mont(&accum[0], &(mmm->N), &accum[0]));
	MP_CHECKOK(mp_copy(montBase, &accum[1]));
	SQR(montBase, &accum[2]);
	MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
	@@ -884,7 +891,7 @@
	} else {
	if (first_window == 0) {
	mp_set(&accum1, 1);
	- MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
	+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
	} else {
	/* assert first_window == 1? */
	MP_CHECKOK(mp_copy(montBase, &accum1));
	@@ -1051,9 +1058,9 @@
	/* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX
	** where n0 = least significant mp_digit of N, the modulus.
	*/
	- mmm.n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(modulus, 0));
	+ mmm.n0prime = mp_calculate_mont_n0i(modulus);

	- MP_CHECKOK(s_mp_to_mont(base, &mmm, &montBase));
	+ MP_CHECKOK(mp_to_mont(base, modulus, &montBase));

	bits_in_exponent = mpl_significant_bits(exponent);
	#ifdef MP_USING_CACHE_SAFE_MOD_EXP
	--- a/lib/freebl/rsa.c
	+++ b/lib/freebl/rsa.c
	@@ -64,6 +64,8 @@
	SECItem modulus; /* list element "key" */
	blindingParams free, bp; /* Blinding parameters queue */
	blindingParams array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE];
	+ /* precalculate montegomery reduction value */
	+ mp_digit n0i; /* n0i = -( n & MP_DIGIT) ** -1 mod mp_RADIX */
	};
	typedef struct RSABlindingParamsStr RSABlindingParams;

	@@ -1146,6 +1148,8 @@
	CHECK_MPI_OK(mp_exptmod(&k, &e, n, f));
	/* g = k*-1 mod n /
	CHECK_MPI_OK(mp_invmod(&k, n, g));
	+ /* g in montgomery form.. */
	+ CHECK_MPI_OK(mp_to_mont(g, n, g));
	cleanup:
	if (kb)
	PORT_ZFree(kb, modLen);
	@@ -1182,13 +1186,16 @@
	rsabp->bp = NULL;
	rsabp->free = bp;

	+ /* precalculate montgomery reduction parameter */
	+ rsabp->n0i = mp_calculate_mont_n0i(n);
	+
	/* List elements are keyed using the modulus */
	return SECITEM_CopyItem(NULL, &rsabp->modulus, &key->modulus);
	}

	static SECStatus
	get_blinding_params(RSAPrivateKey key, mp_int n, unsigned int modLen,
	- mp_int f, mp_int g)
	+ mp_int f, mp_int g, mp_digit *n0i)
	{
	RSABlindingParams *rsabp = NULL;
	blindingParams *bpUnlinked = NULL;
	@@ -1248,6 +1255,7 @@
	/* We've found (or created) the RSAblindingParams struct for this key.
	* Now, search its list of ready blinding params for a usable one.
	*/
	+ *n0i = rsabp->n0i;
	while (0 != (bp = rsabp->bp)) {
	#ifndef UNSAFE_FUZZER_MODE
	if (--(bp->counter) > 0)
	@@ -1355,6 +1363,7 @@
	if (err) {
	MP_TO_SEC_ERROR(err);
	}
	+ *n0i = 0;
	return SECFailure;
	}

	@@ -1374,6 +1383,7 @@
	mp_err err;
	mp_int n, c, m;
	mp_int f, g;
	+ mp_digit n0i;
	if (!key \|\| !output \|\| !input) {
	PORT_SetError(SEC_ERROR_INVALID_ARGS);
	return SECFailure;
	@@ -1401,7 +1411,7 @@
	** blinding factor
	*/
	if (nssRSAUseBlinding) {
	- CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g));
	+ CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g, &n0i));
	/* c' = cf mod n /
	CHECK_MPI_OK(mp_mulmod(&c, &f, &n, &c));
	}
	@@ -1422,7 +1432,7 @@
	*/
	if (nssRSAUseBlinding) {
	/* m = m'g mod n /
	- CHECK_MPI_OK(mp_mulmod(&m, &g, &n, &m));
	+ CHECK_MPI_OK(mp_mulmontmodCT(&m, &g, &n, n0i, &m));
	}
	err = mp_to_fixlen_octets(&m, output, modLen);
	if (err >= 0)
	--- a/lib/freebl/mpi/mpi-priv.h
	+++ b/lib/freebl/mpi/mpi-priv.h
	@@ -204,6 +204,9 @@
	void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a,
	mp_size a_len, mp_digit b,
	mp_digit *c);
	+void MPI_ASM_DECL s_mpv_mul_d_add_propCT(const mp_digit *a,
	+ mp_size a_len, mp_digit b,
	+ mp_digit *c, mp_size c_len);
	void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a,
	mp_size a_len,
	mp_digit *sqrs);