/*
 *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

/*
 * The core AEC algorithm, which is presented with time-aligned signals.
 */

#include "modules/audio_processing/aec/aec_core.h"

#include <math.h>

extern "C" {
#include "common_audio/signal_processing/include/signal_processing_library.h"
}
#include "modules/audio_processing/aec/aec_core_optimized_methods.h"
#include "modules/audio_processing/utility/ooura_fft.h"

namespace webrtc {

extern const float WebRtcAec_weightCurve[65];
extern const float WebRtcAec_overDriveCurve[65];

void WebRtcAec_FilterFar_mips(
    int num_partitions,
    int x_fft_buf_block_pos,
    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
    float y_fft[2][PART_LEN1]) {
  int i;
  for (i = 0; i < num_partitions; i++) {
    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
    int pos = i * PART_LEN1;
    // Check for wrap
    if (i + x_fft_buf_block_pos >= num_partitions) {
      xPos -= num_partitions * (PART_LEN1);
    }
    float* yf0 = y_fft[0];
    float* yf1 = y_fft[1];
    float* aRe = x_fft_buf[0] + xPos;
    float* aIm = x_fft_buf[1] + xPos;
    float* bRe = h_fft_buf[0] + pos;
    float* bIm = h_fft_buf[1] + pos;
    float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
    int len = PART_LEN1 >> 1;

    __asm __volatile(
        ".set       push                                                \n\t"
        ".set       noreorder                                           \n\t"
        "1:                                                             \n\t"
        "lwc1       %[f0],      0(%[aRe])                               \n\t"
        "lwc1       %[f1],      0(%[bRe])                               \n\t"
        "lwc1       %[f2],      0(%[bIm])                               \n\t"
        "lwc1       %[f3],      0(%[aIm])                               \n\t"
        "lwc1       %[f4],      4(%[aRe])                               \n\t"
        "lwc1       %[f5],      4(%[bRe])                               \n\t"
        "lwc1       %[f6],      4(%[bIm])                               \n\t"
        "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
        "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
        "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
        "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
        "lwc1       %[f7],      4(%[aIm])                               \n\t"
#if !defined(MIPS32_R2_LE)
        "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
        "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
        "mul.s      %[f11],     %[f6],          %[f7]                   \n\t"
        "addiu      %[aRe],     %[aRe],         8                       \n\t"
        "addiu      %[aIm],     %[aIm],         8                       \n\t"
        "addiu      %[len],     %[len],         -1                      \n\t"
        "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
        "mul.s      %[f12],     %[f7],          %[f5]                   \n\t"
        "lwc1       %[f2],      0(%[yf0])                               \n\t"
        "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
        "lwc1       %[f3],      0(%[yf1])                               \n\t"
        "sub.s      %[f9],      %[f9],          %[f11]                  \n\t"
        "lwc1       %[f6],      4(%[yf0])                               \n\t"
        "add.s      %[f4],      %[f4],          %[f12]                  \n\t"
#else   // #if !defined(MIPS32_R2_LE)
        "addiu      %[aRe],     %[aRe],         8                       \n\t"
        "addiu      %[aIm],     %[aIm],         8                       \n\t"
        "addiu      %[len],     %[len],         -1                      \n\t"
        "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
        "lwc1       %[f2],      0(%[yf0])                               \n\t"
        "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
        "lwc1       %[f3],      0(%[yf1])                               \n\t"
        "nmsub.s    %[f9],      %[f9],          %[f6],      %[f7]       \n\t"
        "lwc1       %[f6],      4(%[yf0])                               \n\t"
        "madd.s     %[f4],      %[f4],          %[f7],      %[f5]       \n\t"
#endif  // #if !defined(MIPS32_R2_LE)
        "lwc1       %[f5],      4(%[yf1])                               \n\t"
        "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
        "addiu      %[bRe],     %[bRe],         8                       \n\t"
        "addiu      %[bIm],     %[bIm],         8                       \n\t"
        "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
        "add.s      %[f6],      %[f6],          %[f9]                   \n\t"
        "add.s      %[f5],      %[f5],          %[f4]                   \n\t"
        "swc1       %[f2],      0(%[yf0])                               \n\t"
        "swc1       %[f3],      0(%[yf1])                               \n\t"
        "swc1       %[f6],      4(%[yf0])                               \n\t"
        "swc1       %[f5],      4(%[yf1])                               \n\t"
        "addiu      %[yf0],     %[yf0],         8                       \n\t"
        "bgtz       %[len],     1b                                      \n\t"
        " addiu     %[yf1],     %[yf1],         8                       \n\t"
        "lwc1       %[f0],      0(%[aRe])                               \n\t"
        "lwc1       %[f1],      0(%[bRe])                               \n\t"
        "lwc1       %[f2],      0(%[bIm])                               \n\t"
        "lwc1       %[f3],      0(%[aIm])                               \n\t"
        "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
        "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
#if !defined(MIPS32_R2_LE)
        "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
        "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
        "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
        "lwc1       %[f2],      0(%[yf0])                               \n\t"
        "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
        "lwc1       %[f3],      0(%[yf1])                               \n\t"
#else   // #if !defined(MIPS32_R2_LE)
        "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
        "lwc1       %[f2],      0(%[yf0])                               \n\t"
        "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
        "lwc1       %[f3],      0(%[yf1])                               \n\t"
#endif  // #if !defined(MIPS32_R2_LE)
        "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
        "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
        "swc1       %[f2],      0(%[yf0])                               \n\t"
        "swc1       %[f3],      0(%[yf1])                               \n\t"
        ".set       pop                                                 \n\t"
        : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
          [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
          [f8] "=&f"(f8), [f9] "=&f"(f9), [f10] "=&f"(f10), [f11] "=&f"(f11),
          [f12] "=&f"(f12), [f13] "=&f"(f13), [aRe] "+r"(aRe), [aIm] "+r"(aIm),
          [bRe] "+r"(bRe), [bIm] "+r"(bIm), [yf0] "+r"(yf0), [yf1] "+r"(yf1),
          [len] "+r"(len)
        :
        : "memory");
  }
}

void WebRtcAec_FilterAdaptation_mips(
    const OouraFft& ooura_fft,
    int num_partitions,
    int x_fft_buf_block_pos,
    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
    float e_fft[2][PART_LEN1],
    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
  float fft[PART_LEN2];
  int i;
  for (i = 0; i < num_partitions; i++) {
    int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1);
    int pos;
    // Check for wrap
    if (i + x_fft_buf_block_pos >= num_partitions) {
      xPos -= num_partitions * PART_LEN1;
    }

    pos = i * PART_LEN1;
    float* aRe = x_fft_buf[0] + xPos;
    float* aIm = x_fft_buf[1] + xPos;
    float* bRe = e_fft[0];
    float* bIm = e_fft[1];
    float* fft_tmp;

    float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12;
    int len = PART_LEN >> 1;

    __asm __volatile(
        ".set       push                                                \n\t"
        ".set       noreorder                                           \n\t"
        "addiu      %[fft_tmp], %[fft],         0                       \n\t"
        "1:                                                             \n\t"
        "lwc1       %[f0],      0(%[aRe])                               \n\t"
        "lwc1       %[f1],      0(%[bRe])                               \n\t"
        "lwc1       %[f2],      0(%[bIm])                               \n\t"
        "lwc1       %[f4],      4(%[aRe])                               \n\t"
        "lwc1       %[f5],      4(%[bRe])                               \n\t"
        "lwc1       %[f6],      4(%[bIm])                               \n\t"
        "addiu      %[aRe],     %[aRe],         8                       \n\t"
        "addiu      %[bRe],     %[bRe],         8                       \n\t"
        "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
        "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
        "lwc1       %[f3],      0(%[aIm])                               \n\t"
        "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
        "lwc1       %[f7],      4(%[aIm])                               \n\t"
        "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
#if !defined(MIPS32_R2_LE)
        "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
        "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
        "mul.s      %[f11],     %[f7],          %[f6]                   \n\t"
        "mul.s      %[f5],      %[f7],          %[f5]                   \n\t"
        "addiu      %[aIm],     %[aIm],         8                       \n\t"
        "addiu      %[bIm],     %[bIm],         8                       \n\t"
        "addiu      %[len],     %[len],         -1                      \n\t"
        "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
        "sub.s      %[f1],      %[f0],          %[f1]                   \n\t"
        "add.s      %[f9],      %[f9],          %[f11]                  \n\t"
        "sub.s      %[f5],      %[f4],          %[f5]                   \n\t"
#else   // #if !defined(MIPS32_R2_LE)
        "addiu      %[aIm],     %[aIm],         8                       \n\t"
        "addiu      %[bIm],     %[bIm],         8                       \n\t"
        "addiu      %[len],     %[len],         -1                      \n\t"
        "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
        "nmsub.s    %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
        "madd.s     %[f9],      %[f9],          %[f7],      %[f6]       \n\t"
        "nmsub.s    %[f5],      %[f4],          %[f7],      %[f5]       \n\t"
#endif  // #if !defined(MIPS32_R2_LE)
        "swc1       %[f8],      0(%[fft_tmp])                           \n\t"
        "swc1       %[f1],      4(%[fft_tmp])                           \n\t"
        "swc1       %[f9],      8(%[fft_tmp])                           \n\t"
        "swc1       %[f5],      12(%[fft_tmp])                          \n\t"
        "bgtz       %[len],     1b                                      \n\t"
        " addiu     %[fft_tmp], %[fft_tmp],     16                      \n\t"
        "lwc1       %[f0],      0(%[aRe])                               \n\t"
        "lwc1       %[f1],      0(%[bRe])                               \n\t"
        "lwc1       %[f2],      0(%[bIm])                               \n\t"
        "lwc1       %[f3],      0(%[aIm])                               \n\t"
        "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
#if !defined(MIPS32_R2_LE)
        "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
        "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
#else   // #if !defined(MIPS32_R2_LE)
        "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
#endif  // #if !defined(MIPS32_R2_LE)
        "swc1       %[f8],      4(%[fft])                               \n\t"
        ".set       pop                                                 \n\t"
        : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
          [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
          [f8] "=&f"(f8), [f9] "=&f"(f9), [f10] "=&f"(f10), [f11] "=&f"(f11),
          [f12] "=&f"(f12), [aRe] "+r"(aRe), [aIm] "+r"(aIm), [bRe] "+r"(bRe),
          [bIm] "+r"(bIm), [fft_tmp] "=&r"(fft_tmp), [len] "+r"(len)
        : [fft] "r"(fft)
        : "memory");

    ooura_fft.InverseFft(fft);
    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);

    // fft scaling
    {
      float scale = 2.0f / PART_LEN2;
      __asm __volatile(
          ".set     push                                    \n\t"
          ".set     noreorder                               \n\t"
          "addiu    %[fft_tmp], %[fft],        0            \n\t"
          "addiu    %[len],     $zero,         8            \n\t"
          "1:                                               \n\t"
          "addiu    %[len],     %[len],        -1           \n\t"
          "lwc1     %[f0],      0(%[fft_tmp])               \n\t"
          "lwc1     %[f1],      4(%[fft_tmp])               \n\t"
          "lwc1     %[f2],      8(%[fft_tmp])               \n\t"
          "lwc1     %[f3],      12(%[fft_tmp])              \n\t"
          "mul.s    %[f0],      %[f0],         %[scale]     \n\t"
          "mul.s    %[f1],      %[f1],         %[scale]     \n\t"
          "mul.s    %[f2],      %[f2],         %[scale]     \n\t"
          "mul.s    %[f3],      %[f3],         %[scale]     \n\t"
          "lwc1     %[f4],      16(%[fft_tmp])              \n\t"
          "lwc1     %[f5],      20(%[fft_tmp])              \n\t"
          "lwc1     %[f6],      24(%[fft_tmp])              \n\t"
          "lwc1     %[f7],      28(%[fft_tmp])              \n\t"
          "mul.s    %[f4],      %[f4],         %[scale]     \n\t"
          "mul.s    %[f5],      %[f5],         %[scale]     \n\t"
          "mul.s    %[f6],      %[f6],         %[scale]     \n\t"
          "mul.s    %[f7],      %[f7],         %[scale]     \n\t"
          "swc1     %[f0],      0(%[fft_tmp])               \n\t"
          "swc1     %[f1],      4(%[fft_tmp])               \n\t"
          "swc1     %[f2],      8(%[fft_tmp])               \n\t"
          "swc1     %[f3],      12(%[fft_tmp])              \n\t"
          "swc1     %[f4],      16(%[fft_tmp])              \n\t"
          "swc1     %[f5],      20(%[fft_tmp])              \n\t"
          "swc1     %[f6],      24(%[fft_tmp])              \n\t"
          "swc1     %[f7],      28(%[fft_tmp])              \n\t"
          "bgtz     %[len],     1b                          \n\t"
          " addiu   %[fft_tmp], %[fft_tmp],    32           \n\t"
          ".set     pop                                     \n\t"
          : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
            [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
            [len] "=&r"(len), [fft_tmp] "=&r"(fft_tmp)
          : [scale] "f"(scale), [fft] "r"(fft)
          : "memory");
    }
    ooura_fft.Fft(fft);
    aRe = h_fft_buf[0] + pos;
    aIm = h_fft_buf[1] + pos;
    __asm __volatile(
        ".set     push                                    \n\t"
        ".set     noreorder                               \n\t"
        "addiu    %[fft_tmp], %[fft],        0            \n\t"
        "addiu    %[len],     $zero,         31           \n\t"
        "lwc1     %[f0],      0(%[aRe])                   \n\t"
        "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
        "lwc1     %[f2],      256(%[aRe])                 \n\t"
        "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
        "lwc1     %[f4],      4(%[aRe])                   \n\t"
        "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
        "lwc1     %[f6],      4(%[aIm])                   \n\t"
        "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
        "add.s    %[f0],      %[f0],         %[f1]        \n\t"
        "add.s    %[f2],      %[f2],         %[f3]        \n\t"
        "add.s    %[f4],      %[f4],         %[f5]        \n\t"
        "add.s    %[f6],      %[f6],         %[f7]        \n\t"
        "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
        "swc1     %[f0],      0(%[aRe])                   \n\t"
        "swc1     %[f2],      256(%[aRe])                 \n\t"
        "swc1     %[f4],      4(%[aRe])                   \n\t"
        "addiu    %[aRe],     %[aRe],        8            \n\t"
        "swc1     %[f6],      4(%[aIm])                   \n\t"
        "addiu    %[aIm],     %[aIm],        8            \n\t"
        "1:                                               \n\t"
        "lwc1     %[f0],      0(%[aRe])                   \n\t"
        "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
        "lwc1     %[f2],      0(%[aIm])                   \n\t"
        "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
        "lwc1     %[f4],      4(%[aRe])                   \n\t"
        "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
        "lwc1     %[f6],      4(%[aIm])                   \n\t"
        "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
        "add.s    %[f0],      %[f0],         %[f1]        \n\t"
        "add.s    %[f2],      %[f2],         %[f3]        \n\t"
        "add.s    %[f4],      %[f4],         %[f5]        \n\t"
        "add.s    %[f6],      %[f6],         %[f7]        \n\t"
        "addiu    %[len],     %[len],        -1           \n\t"
        "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
        "swc1     %[f0],      0(%[aRe])                   \n\t"
        "swc1     %[f2],      0(%[aIm])                   \n\t"
        "swc1     %[f4],      4(%[aRe])                   \n\t"
        "addiu    %[aRe],     %[aRe],        8            \n\t"
        "swc1     %[f6],      4(%[aIm])                   \n\t"
        "bgtz     %[len],     1b                          \n\t"
        " addiu   %[aIm],     %[aIm],        8            \n\t"
        ".set     pop                                     \n\t"
        : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
          [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
          [len] "=&r"(len), [fft_tmp] "=&r"(fft_tmp), [aRe] "+r"(aRe),
          [aIm] "+r"(aIm)
        : [fft] "r"(fft)
        : "memory");
  }
}

void WebRtcAec_Overdrive_mips(float overdrive_scaling,
                              float hNlFb,
                              float hNl[PART_LEN1]) {
  const float one = 1.0;
  float* p_hNl;
  const float* p_WebRtcAec_wC;
  float temp1, temp2, temp3, temp4;

  p_hNl = &hNl[0];
  p_WebRtcAec_wC = &WebRtcAec_weightCurve[0];

  for (int i = 0; i < PART_LEN1; ++i) {
    // Weight subbands
    __asm __volatile(
        ".set      push                                              \n\t"
        ".set      noreorder                                         \n\t"
        "lwc1      %[temp1],    0(%[p_hNl])                          \n\t"
        "lwc1      %[temp2],    0(%[p_wC])                           \n\t"
        "c.lt.s    %[hNlFb],    %[temp1]                             \n\t"
        "bc1f      1f                                                \n\t"
        " mul.s    %[temp3],    %[temp2],     %[hNlFb]               \n\t"
        "sub.s     %[temp4],    %[one],       %[temp2]               \n\t"
#if !defined(MIPS32_R2_LE)
        "mul.s     %[temp1],    %[temp1],     %[temp4]               \n\t"
        "add.s     %[temp1],    %[temp3],     %[temp1]               \n\t"
#else   // #if !defined(MIPS32_R2_LE)
        "madd.s    %[temp1],    %[temp3],     %[temp1],   %[temp4]   \n\t"
#endif  // #if !defined(MIPS32_R2_LE)
        "swc1      %[temp1],    0(%[p_hNl])                          \n\t"
        "1:                                                           \n\t"
        "addiu     %[p_wC],     %[p_wC],      4                      \n\t"
        ".set      pop                                               \n\t"
        : [temp1] "=&f"(temp1), [temp2] "=&f"(temp2), [temp3] "=&f"(temp3),
          [temp4] "=&f"(temp4), [p_wC] "+r"(p_WebRtcAec_wC)
        : [hNlFb] "f"(hNlFb), [one] "f"(one), [p_hNl] "r"(p_hNl)
        : "memory");

    hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
  }
}

void WebRtcAec_Suppress_mips(const float hNl[PART_LEN1],
                             float efw[2][PART_LEN1]) {
  const float* p_hNl;
  float* p_efw0;
  float* p_efw1;
  float temp1, temp2, temp3, temp4;

  p_hNl = &hNl[0];
  p_efw0 = &efw[0][0];
  p_efw1 = &efw[1][0];

  for (int i = 0; i < PART_LEN1; ++i) {
    __asm __volatile(
        "lwc1      %[temp1],    0(%[p_hNl])              \n\t"
        "lwc1      %[temp3],    0(%[p_efw1])             \n\t"
        "lwc1      %[temp2],    0(%[p_efw0])             \n\t"
        "addiu     %[p_hNl],    %[p_hNl],     4          \n\t"
        "mul.s     %[temp3],    %[temp3],     %[temp1]   \n\t"
        "mul.s     %[temp2],    %[temp2],     %[temp1]   \n\t"
        "addiu     %[p_efw0],   %[p_efw0],    4          \n\t"
        "addiu     %[p_efw1],   %[p_efw1],    4          \n\t"
        "neg.s     %[temp4],    %[temp3]                 \n\t"
        "swc1      %[temp2],    -4(%[p_efw0])            \n\t"
        "swc1      %[temp4],    -4(%[p_efw1])            \n\t"
        : [temp1] "=&f"(temp1), [temp2] "=&f"(temp2), [temp3] "=&f"(temp3),
          [temp4] "=&f"(temp4), [p_efw0] "+r"(p_efw0), [p_efw1] "+r"(p_efw1),
          [p_hNl] "+r"(p_hNl)
        :
        : "memory");
  }
}

void WebRtcAec_ScaleErrorSignal_mips(float mu,
                                     float error_threshold,
                                     float x_pow[PART_LEN1],
                                     float ef[2][PART_LEN1]) {
  int len = (PART_LEN1);
  float* ef0 = ef[0];
  float* ef1 = ef[1];
  float fac1 = 1e-10f;
  float err_th2 = error_threshold * error_threshold;
  float f0, f1, f2;
#if !defined(MIPS32_R2_LE)
  float f3;
#endif

  __asm __volatile(
      ".set       push                                   \n\t"
      ".set       noreorder                              \n\t"
      "1:                                                \n\t"
      "lwc1       %[f0],     0(%[x_pow])                 \n\t"
      "lwc1       %[f1],     0(%[ef0])                   \n\t"
      "lwc1       %[f2],     0(%[ef1])                   \n\t"
      "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
      "div.s      %[f1],     %[f1],       %[f0]          \n\t"
      "div.s      %[f2],     %[f2],       %[f0]          \n\t"
      "mul.s      %[f0],     %[f1],       %[f1]          \n\t"
#if defined(MIPS32_R2_LE)
      "madd.s     %[f0],     %[f0],       %[f2],   %[f2] \n\t"
#else
      "mul.s      %[f3],     %[f2],       %[f2]          \n\t"
      "add.s      %[f0],     %[f0],       %[f3]          \n\t"
#endif
      "c.le.s     %[f0],     %[err_th2]                  \n\t"
      "nop                                               \n\t"
      "bc1t       2f                                     \n\t"
      " nop                                              \n\t"
      "sqrt.s     %[f0],     %[f0]                       \n\t"
      "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
      "div.s      %[f0],     %[err_th],   %[f0]          \n\t"
      "mul.s      %[f1],     %[f1],       %[f0]          \n\t"
      "mul.s      %[f2],     %[f2],       %[f0]          \n\t"
      "2:                                                \n\t"
      "mul.s      %[f1],     %[f1],       %[mu]          \n\t"
      "mul.s      %[f2],     %[f2],       %[mu]          \n\t"
      "swc1       %[f1],     0(%[ef0])                   \n\t"
      "swc1       %[f2],     0(%[ef1])                   \n\t"
      "addiu      %[len],    %[len],      -1             \n\t"
      "addiu      %[x_pow],  %[x_pow],    4              \n\t"
      "addiu      %[ef0],    %[ef0],      4              \n\t"
      "bgtz       %[len],    1b                          \n\t"
      " addiu     %[ef1],    %[ef1],      4              \n\t"
      ".set       pop                                    \n\t"
      : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2),
#if !defined(MIPS32_R2_LE)
        [f3] "=&f"(f3),
#endif
        [x_pow] "+r"(x_pow), [ef0] "+r"(ef0), [ef1] "+r"(ef1), [len] "+r"(len)
      : [fac1] "f"(fac1), [err_th2] "f"(err_th2), [mu] "f"(mu),
        [err_th] "f"(error_threshold)
      : "memory");
}

void WebRtcAec_InitAec_mips(void) {
  WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
  WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
  WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
  WebRtcAec_Overdrive = WebRtcAec_Overdrive_mips;
  WebRtcAec_Suppress = WebRtcAec_Suppress_mips;
}
}  // namespace webrtc
