| /* |
| * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include <string.h> |
| |
| #include "rtc_base/checks.h" |
| #include "modules/audio_processing/ns/noise_suppression_x.h" |
| #include "modules/audio_processing/ns/nsx_core.h" |
| |
| static const int16_t kIndicatorTable[17] = { |
| 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, |
| 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187 |
| }; |
| |
| // Compute speech/noise probability |
| // speech/noise probability is returned in: probSpeechFinal |
| //snrLocPrior is the prior SNR for each frequency (in Q11) |
| //snrLocPost is the post SNR for each frequency (in Q11) |
| void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst, |
| uint16_t* nonSpeechProbFinal, |
| uint32_t* priorLocSnr, |
| uint32_t* postLocSnr) { |
| uint32_t tmpU32no1, tmpU32no2, tmpU32no3; |
| int32_t indPriorFX, tmp32no1; |
| int32_t logLrtTimeAvgKsumFX; |
| int16_t indPriorFX16; |
| int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac; |
| size_t i; |
| int normTmp, nShifts; |
| |
| int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; |
| int32_t const_max = 0x7fffffff; |
| int32_t const_neg43 = -43; |
| int32_t const_5412 = 5412; |
| int32_t const_11rsh12 = (11 << 12); |
| int32_t const_178 = 178; |
| |
| |
| // compute feature based on average LR factor |
| // this is the average over all frequencies of the smooth log LRT |
| logLrtTimeAvgKsumFX = 0; |
| for (i = 0; i < inst->magnLen; i++) { |
| r0 = postLocSnr[i]; // Q11 |
| r1 = priorLocSnr[i]; |
| r2 = inst->logLrtTimeAvgW32[i]; |
| |
| __asm __volatile( |
| ".set push \n\t" |
| ".set noreorder \n\t" |
| "clz %[r3], %[r0] \n\t" |
| "clz %[r5], %[r1] \n\t" |
| "slti %[r4], %[r3], 32 \n\t" |
| "slti %[r6], %[r5], 32 \n\t" |
| "movz %[r3], $0, %[r4] \n\t" |
| "movz %[r5], $0, %[r6] \n\t" |
| "slti %[r4], %[r3], 11 \n\t" |
| "addiu %[r6], %[r3], -11 \n\t" |
| "neg %[r7], %[r6] \n\t" |
| "sllv %[r6], %[r1], %[r6] \n\t" |
| "srav %[r7], %[r1], %[r7] \n\t" |
| "movn %[r6], %[r7], %[r4] \n\t" |
| "sllv %[r1], %[r1], %[r5] \n\t" |
| "and %[r1], %[r1], %[const_max] \n\t" |
| "sra %[r1], %[r1], 19 \n\t" |
| "mul %[r7], %[r1], %[r1] \n\t" |
| "sllv %[r3], %[r0], %[r3] \n\t" |
| "divu %[r8], %[r3], %[r6] \n\t" |
| "slti %[r6], %[r6], 1 \n\t" |
| "mul %[r7], %[r7], %[const_neg43] \n\t" |
| "sra %[r7], %[r7], 19 \n\t" |
| "movz %[r3], %[r8], %[r6] \n\t" |
| "subu %[r0], %[r0], %[r3] \n\t" |
| "movn %[r0], $0, %[r6] \n\t" |
| "mul %[r1], %[r1], %[const_5412] \n\t" |
| "sra %[r1], %[r1], 12 \n\t" |
| "addu %[r7], %[r7], %[r1] \n\t" |
| "addiu %[r1], %[r7], 37 \n\t" |
| "addiu %[r5], %[r5], -31 \n\t" |
| "neg %[r5], %[r5] \n\t" |
| "sll %[r5], %[r5], 12 \n\t" |
| "addu %[r5], %[r5], %[r1] \n\t" |
| "subu %[r7], %[r5], %[const_11rsh12] \n\t" |
| "mul %[r7], %[r7], %[const_178] \n\t" |
| "sra %[r7], %[r7], 8 \n\t" |
| "addu %[r7], %[r7], %[r2] \n\t" |
| "sra %[r7], %[r7], 1 \n\t" |
| "subu %[r2], %[r2], %[r7] \n\t" |
| "addu %[r2], %[r2], %[r0] \n\t" |
| ".set pop \n\t" |
| : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), |
| [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), |
| [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8) |
| : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43), |
| [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12), |
| [const_178] "r" (const_178) |
| : "hi", "lo" |
| ); |
| inst->logLrtTimeAvgW32[i] = r2; |
| logLrtTimeAvgKsumFX += r2; |
| } |
| |
| inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >> |
| (inst->stages + 11); |
| |
| // done with computation of LR factor |
| |
| // |
| // compute the indicator functions |
| // |
| |
| // average LRT feature |
| // FLOAT code |
| // indicator0 = 0.5 * (tanh(widthPrior * |
| // (logLrtTimeAvgKsum - threshPrior0)) + 1.0); |
| tmpIndFX = 16384; // Q14(1.0) |
| tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 |
| nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; |
| //use larger width in tanh map for pause regions |
| if (tmp32no1 < 0) { |
| tmpIndFX = 0; |
| tmp32no1 = -tmp32no1; |
| //widthPrior = widthPrior * 2.0; |
| nShifts++; |
| } |
| tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 |
| // compute indicator function: sigmoid map |
| if (tmp32no1 < (16 << 14) && tmp32no1 >= 0) { |
| tableIndex = (int16_t)(tmp32no1 >> 14); |
| tmp16no2 = kIndicatorTable[tableIndex]; |
| tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; |
| frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14 |
| tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); |
| if (tmpIndFX == 0) { |
| tmpIndFX = 8192 - tmp16no2; // Q14 |
| } else { |
| tmpIndFX = 8192 + tmp16no2; // Q14 |
| } |
| } |
| indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14 |
| |
| //spectral flatness feature |
| if (inst->weightSpecFlat) { |
| tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 |
| tmpIndFX = 16384; // Q14(1.0) |
| //use larger width in tanh map for pause regions |
| tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 |
| nShifts = 4; |
| if (inst->thresholdSpecFlat < tmpU32no1) { |
| tmpIndFX = 0; |
| tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; |
| //widthPrior = widthPrior * 2.0; |
| nShifts++; |
| } |
| tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); //Q14 |
| // compute indicator function: sigmoid map |
| // FLOAT code |
| // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * |
| // (threshPrior1 - tmpFloat1)) + 1.0); |
| if (tmpU32no1 < (16 << 14)) { |
| tableIndex = (int16_t)(tmpU32no1 >> 14); |
| tmp16no2 = kIndicatorTable[tableIndex]; |
| tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; |
| frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 |
| tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); |
| if (tmpIndFX) { |
| tmpIndFX = 8192 + tmp16no2; // Q14 |
| } else { |
| tmpIndFX = 8192 - tmp16no2; // Q14 |
| } |
| } |
| indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14 |
| } |
| |
| //for template spectral-difference |
| if (inst->weightSpecDiff) { |
| tmpU32no1 = 0; |
| if (inst->featureSpecDiff) { |
| normTmp = WEBRTC_SPL_MIN(20 - inst->stages, |
| WebRtcSpl_NormU32(inst->featureSpecDiff)); |
| RTC_DCHECK_GE(normTmp, 0); |
| tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages) |
| tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp); |
| if (tmpU32no2 > 0) { |
| // Q(20 - inst->stages) |
| tmpU32no1 /= tmpU32no2; |
| } else { |
| tmpU32no1 = (uint32_t)(0x7fffffff); |
| } |
| } |
| tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25; |
| tmpU32no2 = tmpU32no1 - tmpU32no3; |
| nShifts = 1; |
| tmpIndFX = 16384; // Q14(1.0) |
| //use larger width in tanh map for pause regions |
| if (tmpU32no2 & 0x80000000) { |
| tmpIndFX = 0; |
| tmpU32no2 = tmpU32no3 - tmpU32no1; |
| //widthPrior = widthPrior * 2.0; |
| nShifts--; |
| } |
| tmpU32no1 = tmpU32no2 >> nShifts; |
| // compute indicator function: sigmoid map |
| /* FLOAT code |
| indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); |
| */ |
| if (tmpU32no1 < (16 << 14)) { |
| tableIndex = (int16_t)(tmpU32no1 >> 14); |
| tmp16no2 = kIndicatorTable[tableIndex]; |
| tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; |
| frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 |
| tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( |
| tmp16no1, frac, 14); |
| if (tmpIndFX) { |
| tmpIndFX = 8192 + tmp16no2; |
| } else { |
| tmpIndFX = 8192 - tmp16no2; |
| } |
| } |
| indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14 |
| } |
| |
| //combine the indicator function with the feature weights |
| // FLOAT code |
| // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * |
| // indicator1 + weightIndPrior2 * indicator2); |
| indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 |
| // done with computing indicator function |
| |
| //compute the prior probability |
| // FLOAT code |
| // inst->priorNonSpeechProb += PRIOR_UPDATE * |
| // (indPriorNonSpeech - inst->priorNonSpeechProb); |
| tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 |
| inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14); |
| |
| //final speech probability: combine prior model with LR factor: |
| |
| memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen); |
| |
| if (inst->priorNonSpeechProb > 0) { |
| r0 = inst->priorNonSpeechProb; |
| r1 = 16384 - r0; |
| int32_t const_23637 = 23637; |
| int32_t const_44 = 44; |
| int32_t const_84 = 84; |
| int32_t const_1 = 1; |
| int32_t const_neg8 = -8; |
| for (i = 0; i < inst->magnLen; i++) { |
| r2 = inst->logLrtTimeAvgW32[i]; |
| if (r2 < 65300) { |
| __asm __volatile( |
| ".set push \n\t" |
| ".set noreorder \n\t" |
| "mul %[r2], %[r2], %[const_23637] \n\t" |
| "sll %[r6], %[r1], 16 \n\t" |
| "clz %[r7], %[r6] \n\t" |
| "clo %[r8], %[r6] \n\t" |
| "slt %[r9], %[r6], $0 \n\t" |
| "movn %[r7], %[r8], %[r9] \n\t" |
| "sra %[r2], %[r2], 14 \n\t" |
| "andi %[r3], %[r2], 0xfff \n\t" |
| "mul %[r4], %[r3], %[r3] \n\t" |
| "mul %[r3], %[r3], %[const_84] \n\t" |
| "sra %[r2], %[r2], 12 \n\t" |
| "slt %[r5], %[r2], %[const_neg8] \n\t" |
| "movn %[r2], %[const_neg8], %[r5] \n\t" |
| "mul %[r4], %[r4], %[const_44] \n\t" |
| "sra %[r3], %[r3], 7 \n\t" |
| "addiu %[r7], %[r7], -1 \n\t" |
| "slti %[r9], %[r7], 31 \n\t" |
| "movz %[r7], $0, %[r9] \n\t" |
| "sra %[r4], %[r4], 19 \n\t" |
| "addu %[r4], %[r4], %[r3] \n\t" |
| "addiu %[r3], %[r2], 8 \n\t" |
| "addiu %[r2], %[r2], -4 \n\t" |
| "neg %[r5], %[r2] \n\t" |
| "sllv %[r6], %[r4], %[r2] \n\t" |
| "srav %[r5], %[r4], %[r5] \n\t" |
| "slt %[r2], %[r2], $0 \n\t" |
| "movn %[r6], %[r5], %[r2] \n\t" |
| "sllv %[r3], %[const_1], %[r3] \n\t" |
| "addu %[r2], %[r3], %[r6] \n\t" |
| "clz %[r4], %[r2] \n\t" |
| "clo %[r5], %[r2] \n\t" |
| "slt %[r8], %[r2], $0 \n\t" |
| "movn %[r4], %[r5], %[r8] \n\t" |
| "addiu %[r4], %[r4], -1 \n\t" |
| "slt %[r5], $0, %[r2] \n\t" |
| "or %[r5], %[r5], %[r7] \n\t" |
| "movz %[r4], $0, %[r5] \n\t" |
| "addiu %[r6], %[r7], -7 \n\t" |
| "addu %[r6], %[r6], %[r4] \n\t" |
| "bltz %[r6], 1f \n\t" |
| " nop \n\t" |
| "addiu %[r4], %[r6], -8 \n\t" |
| "neg %[r3], %[r4] \n\t" |
| "srav %[r5], %[r2], %[r3] \n\t" |
| "mul %[r5], %[r5], %[r1] \n\t" |
| "mul %[r2], %[r2], %[r1] \n\t" |
| "slt %[r4], %[r4], $0 \n\t" |
| "srav %[r5], %[r5], %[r6] \n\t" |
| "sra %[r2], %[r2], 8 \n\t" |
| "movn %[r2], %[r5], %[r4] \n\t" |
| "sll %[r3], %[r0], 8 \n\t" |
| "addu %[r2], %[r0], %[r2] \n\t" |
| "divu %[r3], %[r3], %[r2] \n\t" |
| "1: \n\t" |
| ".set pop \n\t" |
| : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4), |
| [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), |
| [r8] "=&r" (r8), [r9] "=&r" (r9) |
| : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637), |
| [const_neg8] "r" (const_neg8), [const_84] "r" (const_84), |
| [const_1] "r" (const_1), [const_44] "r" (const_44) |
| : "hi", "lo" |
| ); |
| nonSpeechProbFinal[i] = r3; |
| } |
| } |
| } |
| } |
| |
| // Update analysis buffer for lower band, and window data before FFT. |
| void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst, |
| int16_t* out, |
| int16_t* new_speech) { |
| int iters, after; |
| int anaLen = (int)inst->anaLen; |
| int *window = (int*)inst->window; |
| int *anaBuf = (int*)inst->analysisBuffer; |
| int *outBuf = (int*)out; |
| int r0, r1, r2, r3, r4, r5, r6, r7; |
| #if defined(MIPS_DSP_R1_LE) |
| int r8; |
| #endif |
| |
| // For lower band update analysis buffer. |
| memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, |
| (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer)); |
| memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech, |
| inst->blockLen10ms * sizeof(*inst->analysisBuffer)); |
| |
| // Window data before FFT. |
| #if defined(MIPS_DSP_R1_LE) |
| __asm __volatile( |
| ".set push \n\t" |
| ".set noreorder \n\t" |
| "sra %[iters], %[anaLen], 3 \n\t" |
| "1: \n\t" |
| "blez %[iters], 2f \n\t" |
| " nop \n\t" |
| "lw %[r0], 0(%[window]) \n\t" |
| "lw %[r1], 0(%[anaBuf]) \n\t" |
| "lw %[r2], 4(%[window]) \n\t" |
| "lw %[r3], 4(%[anaBuf]) \n\t" |
| "lw %[r4], 8(%[window]) \n\t" |
| "lw %[r5], 8(%[anaBuf]) \n\t" |
| "lw %[r6], 12(%[window]) \n\t" |
| "lw %[r7], 12(%[anaBuf]) \n\t" |
| "muleq_s.w.phl %[r8], %[r0], %[r1] \n\t" |
| "muleq_s.w.phr %[r0], %[r0], %[r1] \n\t" |
| "muleq_s.w.phl %[r1], %[r2], %[r3] \n\t" |
| "muleq_s.w.phr %[r2], %[r2], %[r3] \n\t" |
| "muleq_s.w.phl %[r3], %[r4], %[r5] \n\t" |
| "muleq_s.w.phr %[r4], %[r4], %[r5] \n\t" |
| "muleq_s.w.phl %[r5], %[r6], %[r7] \n\t" |
| "muleq_s.w.phr %[r6], %[r6], %[r7] \n\t" |
| #if defined(MIPS_DSP_R2_LE) |
| "precr_sra_r.ph.w %[r8], %[r0], 15 \n\t" |
| "precr_sra_r.ph.w %[r1], %[r2], 15 \n\t" |
| "precr_sra_r.ph.w %[r3], %[r4], 15 \n\t" |
| "precr_sra_r.ph.w %[r5], %[r6], 15 \n\t" |
| "sw %[r8], 0(%[outBuf]) \n\t" |
| "sw %[r1], 4(%[outBuf]) \n\t" |
| "sw %[r3], 8(%[outBuf]) \n\t" |
| "sw %[r5], 12(%[outBuf]) \n\t" |
| #else |
| "shra_r.w %[r8], %[r8], 15 \n\t" |
| "shra_r.w %[r0], %[r0], 15 \n\t" |
| "shra_r.w %[r1], %[r1], 15 \n\t" |
| "shra_r.w %[r2], %[r2], 15 \n\t" |
| "shra_r.w %[r3], %[r3], 15 \n\t" |
| "shra_r.w %[r4], %[r4], 15 \n\t" |
| "shra_r.w %[r5], %[r5], 15 \n\t" |
| "shra_r.w %[r6], %[r6], 15 \n\t" |
| "sll %[r0], %[r0], 16 \n\t" |
| "sll %[r2], %[r2], 16 \n\t" |
| "sll %[r4], %[r4], 16 \n\t" |
| "sll %[r6], %[r6], 16 \n\t" |
| "packrl.ph %[r0], %[r8], %[r0] \n\t" |
| "packrl.ph %[r2], %[r1], %[r2] \n\t" |
| "packrl.ph %[r4], %[r3], %[r4] \n\t" |
| "packrl.ph %[r6], %[r5], %[r6] \n\t" |
| "sw %[r0], 0(%[outBuf]) \n\t" |
| "sw %[r2], 4(%[outBuf]) \n\t" |
| "sw %[r4], 8(%[outBuf]) \n\t" |
| "sw %[r6], 12(%[outBuf]) \n\t" |
| #endif |
| "addiu %[window], %[window], 16 \n\t" |
| "addiu %[anaBuf], %[anaBuf], 16 \n\t" |
| "addiu %[outBuf], %[outBuf], 16 \n\t" |
| "b 1b \n\t" |
| " addiu %[iters], %[iters], -1 \n\t" |
| "2: \n\t" |
| "andi %[after], %[anaLen], 7 \n\t" |
| "3: \n\t" |
| "blez %[after], 4f \n\t" |
| " nop \n\t" |
| "lh %[r0], 0(%[window]) \n\t" |
| "lh %[r1], 0(%[anaBuf]) \n\t" |
| "mul %[r0], %[r0], %[r1] \n\t" |
| "addiu %[window], %[window], 2 \n\t" |
| "addiu %[anaBuf], %[anaBuf], 2 \n\t" |
| "addiu %[outBuf], %[outBuf], 2 \n\t" |
| "shra_r.w %[r0], %[r0], 14 \n\t" |
| "sh %[r0], -2(%[outBuf]) \n\t" |
| "b 3b \n\t" |
| " addiu %[after], %[after], -1 \n\t" |
| "4: \n\t" |
| ".set pop \n\t" |
| : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), |
| [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), |
| [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8), |
| [iters] "=&r" (iters), [after] "=&r" (after), |
| [window] "+r" (window),[anaBuf] "+r" (anaBuf), |
| [outBuf] "+r" (outBuf) |
| : [anaLen] "r" (anaLen) |
| : "memory", "hi", "lo" |
| ); |
| #else |
| __asm __volatile( |
| ".set push \n\t" |
| ".set noreorder \n\t" |
| "sra %[iters], %[anaLen], 2 \n\t" |
| "1: \n\t" |
| "blez %[iters], 2f \n\t" |
| " nop \n\t" |
| "lh %[r0], 0(%[window]) \n\t" |
| "lh %[r1], 0(%[anaBuf]) \n\t" |
| "lh %[r2], 2(%[window]) \n\t" |
| "lh %[r3], 2(%[anaBuf]) \n\t" |
| "lh %[r4], 4(%[window]) \n\t" |
| "lh %[r5], 4(%[anaBuf]) \n\t" |
| "lh %[r6], 6(%[window]) \n\t" |
| "lh %[r7], 6(%[anaBuf]) \n\t" |
| "mul %[r0], %[r0], %[r1] \n\t" |
| "mul %[r2], %[r2], %[r3] \n\t" |
| "mul %[r4], %[r4], %[r5] \n\t" |
| "mul %[r6], %[r6], %[r7] \n\t" |
| "addiu %[window], %[window], 8 \n\t" |
| "addiu %[anaBuf], %[anaBuf], 8 \n\t" |
| "addiu %[r0], %[r0], 0x2000 \n\t" |
| "addiu %[r2], %[r2], 0x2000 \n\t" |
| "addiu %[r4], %[r4], 0x2000 \n\t" |
| "addiu %[r6], %[r6], 0x2000 \n\t" |
| "sra %[r0], %[r0], 14 \n\t" |
| "sra %[r2], %[r2], 14 \n\t" |
| "sra %[r4], %[r4], 14 \n\t" |
| "sra %[r6], %[r6], 14 \n\t" |
| "sh %[r0], 0(%[outBuf]) \n\t" |
| "sh %[r2], 2(%[outBuf]) \n\t" |
| "sh %[r4], 4(%[outBuf]) \n\t" |
| "sh %[r6], 6(%[outBuf]) \n\t" |
| "addiu %[outBuf], %[outBuf], 8 \n\t" |
| "b 1b \n\t" |
| " addiu %[iters], %[iters], -1 \n\t" |
| "2: \n\t" |
| "andi %[after], %[anaLen], 3 \n\t" |
| "3: \n\t" |
| "blez %[after], 4f \n\t" |
| " nop \n\t" |
| "lh %[r0], 0(%[window]) \n\t" |
| "lh %[r1], 0(%[anaBuf]) \n\t" |
| "mul %[r0], %[r0], %[r1] \n\t" |
| "addiu %[window], %[window], 2 \n\t" |
| "addiu %[anaBuf], %[anaBuf], 2 \n\t" |
| "addiu %[outBuf], %[outBuf], 2 \n\t" |
| "addiu %[r0], %[r0], 0x2000 \n\t" |
| "sra %[r0], %[r0], 14 \n\t" |
| "sh %[r0], -2(%[outBuf]) \n\t" |
| "b 3b \n\t" |
| " addiu %[after], %[after], -1 \n\t" |
| "4: \n\t" |
| ".set pop \n\t" |
| : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), |
| [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), |
| [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters), |
| [after] "=&r" (after), [window] "+r" (window), |
| [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf) |
| : [anaLen] "r" (anaLen) |
| : "memory", "hi", "lo" |
| ); |
| #endif |
| } |
| |
| // For the noise supression process, synthesis, read out fully processed |
| // segment, and update synthesis buffer. |
| void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst, |
| int16_t* out_frame, |
| int16_t gain_factor) { |
| int iters = (int)inst->blockLen10ms >> 2; |
| int after = inst->blockLen10ms & 3; |
| int r0, r1, r2, r3, r4, r5, r6, r7; |
| int16_t *window = (int16_t*)inst->window; |
| int16_t *real = inst->real; |
| int16_t *synthBuf = inst->synthesisBuffer; |
| int16_t *out = out_frame; |
| int sat_pos = 0x7fff; |
| int sat_neg = 0xffff8000; |
| int block10 = (int)inst->blockLen10ms; |
| int anaLen = (int)inst->anaLen; |
| |
| __asm __volatile( |
| ".set push \n\t" |
| ".set noreorder \n\t" |
| "1: \n\t" |
| "blez %[iters], 2f \n\t" |
| " nop \n\t" |
| "lh %[r0], 0(%[window]) \n\t" |
| "lh %[r1], 0(%[real]) \n\t" |
| "lh %[r2], 2(%[window]) \n\t" |
| "lh %[r3], 2(%[real]) \n\t" |
| "lh %[r4], 4(%[window]) \n\t" |
| "lh %[r5], 4(%[real]) \n\t" |
| "lh %[r6], 6(%[window]) \n\t" |
| "lh %[r7], 6(%[real]) \n\t" |
| "mul %[r0], %[r0], %[r1] \n\t" |
| "mul %[r2], %[r2], %[r3] \n\t" |
| "mul %[r4], %[r4], %[r5] \n\t" |
| "mul %[r6], %[r6], %[r7] \n\t" |
| "addiu %[r0], %[r0], 0x2000 \n\t" |
| "addiu %[r2], %[r2], 0x2000 \n\t" |
| "addiu %[r4], %[r4], 0x2000 \n\t" |
| "addiu %[r6], %[r6], 0x2000 \n\t" |
| "sra %[r0], %[r0], 14 \n\t" |
| "sra %[r2], %[r2], 14 \n\t" |
| "sra %[r4], %[r4], 14 \n\t" |
| "sra %[r6], %[r6], 14 \n\t" |
| "mul %[r0], %[r0], %[gain_factor] \n\t" |
| "mul %[r2], %[r2], %[gain_factor] \n\t" |
| "mul %[r4], %[r4], %[gain_factor] \n\t" |
| "mul %[r6], %[r6], %[gain_factor] \n\t" |
| "addiu %[r0], %[r0], 0x1000 \n\t" |
| "addiu %[r2], %[r2], 0x1000 \n\t" |
| "addiu %[r4], %[r4], 0x1000 \n\t" |
| "addiu %[r6], %[r6], 0x1000 \n\t" |
| "sra %[r0], %[r0], 13 \n\t" |
| "sra %[r2], %[r2], 13 \n\t" |
| "sra %[r4], %[r4], 13 \n\t" |
| "sra %[r6], %[r6], 13 \n\t" |
| "slt %[r1], %[r0], %[sat_pos] \n\t" |
| "slt %[r3], %[r2], %[sat_pos] \n\t" |
| "slt %[r5], %[r4], %[sat_pos] \n\t" |
| "slt %[r7], %[r6], %[sat_pos] \n\t" |
| "movz %[r0], %[sat_pos], %[r1] \n\t" |
| "movz %[r2], %[sat_pos], %[r3] \n\t" |
| "movz %[r4], %[sat_pos], %[r5] \n\t" |
| "movz %[r6], %[sat_pos], %[r7] \n\t" |
| "lh %[r1], 0(%[synthBuf]) \n\t" |
| "lh %[r3], 2(%[synthBuf]) \n\t" |
| "lh %[r5], 4(%[synthBuf]) \n\t" |
| "lh %[r7], 6(%[synthBuf]) \n\t" |
| "addu %[r0], %[r0], %[r1] \n\t" |
| "addu %[r2], %[r2], %[r3] \n\t" |
| "addu %[r4], %[r4], %[r5] \n\t" |
| "addu %[r6], %[r6], %[r7] \n\t" |
| "slt %[r1], %[r0], %[sat_pos] \n\t" |
| "slt %[r3], %[r2], %[sat_pos] \n\t" |
| "slt %[r5], %[r4], %[sat_pos] \n\t" |
| "slt %[r7], %[r6], %[sat_pos] \n\t" |
| "movz %[r0], %[sat_pos], %[r1] \n\t" |
| "movz %[r2], %[sat_pos], %[r3] \n\t" |
| "movz %[r4], %[sat_pos], %[r5] \n\t" |
| "movz %[r6], %[sat_pos], %[r7] \n\t" |
| "slt %[r1], %[r0], %[sat_neg] \n\t" |
| "slt %[r3], %[r2], %[sat_neg] \n\t" |
| "slt %[r5], %[r4], %[sat_neg] \n\t" |
| "slt %[r7], %[r6], %[sat_neg] \n\t" |
| "movn %[r0], %[sat_neg], %[r1] \n\t" |
| "movn %[r2], %[sat_neg], %[r3] \n\t" |
| "movn %[r4], %[sat_neg], %[r5] \n\t" |
| "movn %[r6], %[sat_neg], %[r7] \n\t" |
| "sh %[r0], 0(%[synthBuf]) \n\t" |
| "sh %[r2], 2(%[synthBuf]) \n\t" |
| "sh %[r4], 4(%[synthBuf]) \n\t" |
| "sh %[r6], 6(%[synthBuf]) \n\t" |
| "sh %[r0], 0(%[out]) \n\t" |
| "sh %[r2], 2(%[out]) \n\t" |
| "sh %[r4], 4(%[out]) \n\t" |
| "sh %[r6], 6(%[out]) \n\t" |
| "addiu %[window], %[window], 8 \n\t" |
| "addiu %[real], %[real], 8 \n\t" |
| "addiu %[synthBuf],%[synthBuf], 8 \n\t" |
| "addiu %[out], %[out], 8 \n\t" |
| "b 1b \n\t" |
| " addiu %[iters], %[iters], -1 \n\t" |
| "2: \n\t" |
| "blez %[after], 3f \n\t" |
| " subu %[block10], %[anaLen], %[block10] \n\t" |
| "lh %[r0], 0(%[window]) \n\t" |
| "lh %[r1], 0(%[real]) \n\t" |
| "mul %[r0], %[r0], %[r1] \n\t" |
| "addiu %[window], %[window], 2 \n\t" |
| "addiu %[real], %[real], 2 \n\t" |
| "addiu %[r0], %[r0], 0x2000 \n\t" |
| "sra %[r0], %[r0], 14 \n\t" |
| "mul %[r0], %[r0], %[gain_factor] \n\t" |
| "addiu %[r0], %[r0], 0x1000 \n\t" |
| "sra %[r0], %[r0], 13 \n\t" |
| "slt %[r1], %[r0], %[sat_pos] \n\t" |
| "movz %[r0], %[sat_pos], %[r1] \n\t" |
| "lh %[r1], 0(%[synthBuf]) \n\t" |
| "addu %[r0], %[r0], %[r1] \n\t" |
| "slt %[r1], %[r0], %[sat_pos] \n\t" |
| "movz %[r0], %[sat_pos], %[r1] \n\t" |
| "slt %[r1], %[r0], %[sat_neg] \n\t" |
| "movn %[r0], %[sat_neg], %[r1] \n\t" |
| "sh %[r0], 0(%[synthBuf]) \n\t" |
| "sh %[r0], 0(%[out]) \n\t" |
| "addiu %[synthBuf],%[synthBuf], 2 \n\t" |
| "addiu %[out], %[out], 2 \n\t" |
| "b 2b \n\t" |
| " addiu %[after], %[after], -1 \n\t" |
| "3: \n\t" |
| "sra %[iters], %[block10], 2 \n\t" |
| "4: \n\t" |
| "blez %[iters], 5f \n\t" |
| " andi %[after], %[block10], 3 \n\t" |
| "lh %[r0], 0(%[window]) \n\t" |
| "lh %[r1], 0(%[real]) \n\t" |
| "lh %[r2], 2(%[window]) \n\t" |
| "lh %[r3], 2(%[real]) \n\t" |
| "lh %[r4], 4(%[window]) \n\t" |
| "lh %[r5], 4(%[real]) \n\t" |
| "lh %[r6], 6(%[window]) \n\t" |
| "lh %[r7], 6(%[real]) \n\t" |
| "mul %[r0], %[r0], %[r1] \n\t" |
| "mul %[r2], %[r2], %[r3] \n\t" |
| "mul %[r4], %[r4], %[r5] \n\t" |
| "mul %[r6], %[r6], %[r7] \n\t" |
| "addiu %[r0], %[r0], 0x2000 \n\t" |
| "addiu %[r2], %[r2], 0x2000 \n\t" |
| "addiu %[r4], %[r4], 0x2000 \n\t" |
| "addiu %[r6], %[r6], 0x2000 \n\t" |
| "sra %[r0], %[r0], 14 \n\t" |
| "sra %[r2], %[r2], 14 \n\t" |
| "sra %[r4], %[r4], 14 \n\t" |
| "sra %[r6], %[r6], 14 \n\t" |
| "mul %[r0], %[r0], %[gain_factor] \n\t" |
| "mul %[r2], %[r2], %[gain_factor] \n\t" |
| "mul %[r4], %[r4], %[gain_factor] \n\t" |
| "mul %[r6], %[r6], %[gain_factor] \n\t" |
| "addiu %[r0], %[r0], 0x1000 \n\t" |
| "addiu %[r2], %[r2], 0x1000 \n\t" |
| "addiu %[r4], %[r4], 0x1000 \n\t" |
| "addiu %[r6], %[r6], 0x1000 \n\t" |
| "sra %[r0], %[r0], 13 \n\t" |
| "sra %[r2], %[r2], 13 \n\t" |
| "sra %[r4], %[r4], 13 \n\t" |
| "sra %[r6], %[r6], 13 \n\t" |
| "slt %[r1], %[r0], %[sat_pos] \n\t" |
| "slt %[r3], %[r2], %[sat_pos] \n\t" |
| "slt %[r5], %[r4], %[sat_pos] \n\t" |
| "slt %[r7], %[r6], %[sat_pos] \n\t" |
| "movz %[r0], %[sat_pos], %[r1] \n\t" |
| "movz %[r2], %[sat_pos], %[r3] \n\t" |
| "movz %[r4], %[sat_pos], %[r5] \n\t" |
| "movz %[r6], %[sat_pos], %[r7] \n\t" |
| "lh %[r1], 0(%[synthBuf]) \n\t" |
| "lh %[r3], 2(%[synthBuf]) \n\t" |
| "lh %[r5], 4(%[synthBuf]) \n\t" |
| "lh %[r7], 6(%[synthBuf]) \n\t" |
| "addu %[r0], %[r0], %[r1] \n\t" |
| "addu %[r2], %[r2], %[r3] \n\t" |
| "addu %[r4], %[r4], %[r5] \n\t" |
| "addu %[r6], %[r6], %[r7] \n\t" |
| "slt %[r1], %[r0], %[sat_pos] \n\t" |
| "slt %[r3], %[r2], %[sat_pos] \n\t" |
| "slt %[r5], %[r4], %[sat_pos] \n\t" |
| "slt %[r7], %[r6], %[sat_pos] \n\t" |
| "movz %[r0], %[sat_pos], %[r1] \n\t" |
| "movz %[r2], %[sat_pos], %[r3] \n\t" |
| "movz %[r4], %[sat_pos], %[r5] \n\t" |
| "movz %[r6], %[sat_pos], %[r7] \n\t" |
| "slt %[r1], %[r0], %[sat_neg] \n\t" |
| "slt %[r3], %[r2], %[sat_neg] \n\t" |
| "slt %[r5], %[r4], %[sat_neg] \n\t" |
| "slt %[r7], %[r6], %[sat_neg] \n\t" |
| "movn %[r0], %[sat_neg], %[r1] \n\t" |
| "movn %[r2], %[sat_neg], %[r3] \n\t" |
| "movn %[r4], %[sat_neg], %[r5] \n\t" |
| "movn %[r6], %[sat_neg], %[r7] \n\t" |
| "sh %[r0], 0(%[synthBuf]) \n\t" |
| "sh %[r2], 2(%[synthBuf]) \n\t" |
| "sh %[r4], 4(%[synthBuf]) \n\t" |
| "sh %[r6], 6(%[synthBuf]) \n\t" |
| "addiu %[window], %[window], 8 \n\t" |
| "addiu %[real], %[real], 8 \n\t" |
| "addiu %[synthBuf],%[synthBuf], 8 \n\t" |
| "b 4b \n\t" |
| " addiu %[iters], %[iters], -1 \n\t" |
| "5: \n\t" |
| "blez %[after], 6f \n\t" |
| " nop \n\t" |
| "lh %[r0], 0(%[window]) \n\t" |
| "lh %[r1], 0(%[real]) \n\t" |
| "mul %[r0], %[r0], %[r1] \n\t" |
| "addiu %[window], %[window], 2 \n\t" |
| "addiu %[real], %[real], 2 \n\t" |
| "addiu %[r0], %[r0], 0x2000 \n\t" |
| "sra %[r0], %[r0], 14 \n\t" |
| "mul %[r0], %[r0], %[gain_factor] \n\t" |
| "addiu %[r0], %[r0], 0x1000 \n\t" |
| "sra %[r0], %[r0], 13 \n\t" |
| "slt %[r1], %[r0], %[sat_pos] \n\t" |
| "movz %[r0], %[sat_pos], %[r1] \n\t" |
| "lh %[r1], 0(%[synthBuf]) \n\t" |
| "addu %[r0], %[r0], %[r1] \n\t" |
| "slt %[r1], %[r0], %[sat_pos] \n\t" |
| "movz %[r0], %[sat_pos], %[r1] \n\t" |
| "slt %[r1], %[r0], %[sat_neg] \n\t" |
| "movn %[r0], %[sat_neg], %[r1] \n\t" |
| "sh %[r0], 0(%[synthBuf]) \n\t" |
| "addiu %[synthBuf],%[synthBuf], 2 \n\t" |
| "b 2b \n\t" |
| " addiu %[after], %[after], -1 \n\t" |
| "6: \n\t" |
| ".set pop \n\t" |
| : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), |
| [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), |
| [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters), |
| [after] "+r" (after), [block10] "+r" (block10), |
| [window] "+r" (window), [real] "+r" (real), |
| [synthBuf] "+r" (synthBuf), [out] "+r" (out) |
| : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos), |
| [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen) |
| : "memory", "hi", "lo" |
| ); |
| |
| // update synthesis buffer |
| memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, |
| (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer)); |
| WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer |
| + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); |
| } |
| |
| // Filter the data in the frequency domain, and create spectrum. |
| void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst, |
| int16_t* freq_buf) { |
| uint16_t *noiseSupFilter = inst->noiseSupFilter; |
| int16_t *real = inst->real; |
| int16_t *imag = inst->imag; |
| int32_t loop_count = 2; |
| int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6; |
| int16_t tmp16 = (int16_t)(inst->anaLen << 1) - 4; |
| int16_t* freq_buf_f = freq_buf; |
| int16_t* freq_buf_s = &freq_buf[tmp16]; |
| |
| __asm __volatile ( |
| ".set push \n\t" |
| ".set noreorder \n\t" |
| //first sample |
| "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" |
| "lh %[tmp_2], 0(%[real]) \n\t" |
| "lh %[tmp_3], 0(%[imag]) \n\t" |
| "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" |
| "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" |
| "sra %[tmp_2], %[tmp_2], 14 \n\t" |
| "sra %[tmp_3], %[tmp_3], 14 \n\t" |
| "sh %[tmp_2], 0(%[real]) \n\t" |
| "sh %[tmp_3], 0(%[imag]) \n\t" |
| "negu %[tmp_3], %[tmp_3] \n\t" |
| "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" |
| "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" |
| "addiu %[real], %[real], 2 \n\t" |
| "addiu %[imag], %[imag], 2 \n\t" |
| "addiu %[noiseSupFilter], %[noiseSupFilter], 2 \n\t" |
| "addiu %[freq_buf_f], %[freq_buf_f], 4 \n\t" |
| "1: \n\t" |
| "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" |
| "lh %[tmp_2], 0(%[real]) \n\t" |
| "lh %[tmp_3], 0(%[imag]) \n\t" |
| "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t" |
| "lh %[tmp_5], 2(%[real]) \n\t" |
| "lh %[tmp_6], 2(%[imag]) \n\t" |
| "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" |
| "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" |
| "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t" |
| "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t" |
| "addiu %[loop_count], %[loop_count], 2 \n\t" |
| "sra %[tmp_2], %[tmp_2], 14 \n\t" |
| "sra %[tmp_3], %[tmp_3], 14 \n\t" |
| "sra %[tmp_5], %[tmp_5], 14 \n\t" |
| "sra %[tmp_6], %[tmp_6], 14 \n\t" |
| "addiu %[noiseSupFilter], %[noiseSupFilter], 4 \n\t" |
| "sh %[tmp_2], 0(%[real]) \n\t" |
| "sh %[tmp_2], 4(%[freq_buf_s]) \n\t" |
| "sh %[tmp_3], 0(%[imag]) \n\t" |
| "sh %[tmp_3], 6(%[freq_buf_s]) \n\t" |
| "negu %[tmp_3], %[tmp_3] \n\t" |
| "sh %[tmp_5], 2(%[real]) \n\t" |
| "sh %[tmp_5], 0(%[freq_buf_s]) \n\t" |
| "sh %[tmp_6], 2(%[imag]) \n\t" |
| "sh %[tmp_6], 2(%[freq_buf_s]) \n\t" |
| "negu %[tmp_6], %[tmp_6] \n\t" |
| "addiu %[freq_buf_s], %[freq_buf_s], -8 \n\t" |
| "addiu %[real], %[real], 4 \n\t" |
| "addiu %[imag], %[imag], 4 \n\t" |
| "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" |
| "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" |
| "sh %[tmp_5], 4(%[freq_buf_f]) \n\t" |
| "sh %[tmp_6], 6(%[freq_buf_f]) \n\t" |
| "blt %[loop_count], %[loop_size], 1b \n\t" |
| " addiu %[freq_buf_f], %[freq_buf_f], 8 \n\t" |
| //last two samples: |
| "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" |
| "lh %[tmp_2], 0(%[real]) \n\t" |
| "lh %[tmp_3], 0(%[imag]) \n\t" |
| "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t" |
| "lh %[tmp_5], 2(%[real]) \n\t" |
| "lh %[tmp_6], 2(%[imag]) \n\t" |
| "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" |
| "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" |
| "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t" |
| "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t" |
| "sra %[tmp_2], %[tmp_2], 14 \n\t" |
| "sra %[tmp_3], %[tmp_3], 14 \n\t" |
| "sra %[tmp_5], %[tmp_5], 14 \n\t" |
| "sra %[tmp_6], %[tmp_6], 14 \n\t" |
| "sh %[tmp_2], 0(%[real]) \n\t" |
| "sh %[tmp_2], 4(%[freq_buf_s]) \n\t" |
| "sh %[tmp_3], 0(%[imag]) \n\t" |
| "sh %[tmp_3], 6(%[freq_buf_s]) \n\t" |
| "negu %[tmp_3], %[tmp_3] \n\t" |
| "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" |
| "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" |
| "sh %[tmp_5], 4(%[freq_buf_f]) \n\t" |
| "sh %[tmp_6], 6(%[freq_buf_f]) \n\t" |
| "sh %[tmp_5], 2(%[real]) \n\t" |
| "sh %[tmp_6], 2(%[imag]) \n\t" |
| ".set pop \n\t" |
| : [real] "+r" (real), [imag] "+r" (imag), |
| [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s), |
| [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter), |
| [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3), |
| [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6) |
| : [loop_size] "r" (inst->anaLen2) |
| : "memory", "hi", "lo" |
| ); |
| } |
| |
| #if defined(MIPS_DSP_R1_LE) |
| // Denormalize the real-valued signal |in|, the output from inverse FFT. |
| void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst, |
| int16_t* in, |
| int factor) { |
| int32_t r0, r1, r2, r3, t0; |
| int len = (int)inst->anaLen; |
| int16_t *out = &inst->real[0]; |
| int shift = factor - inst->normData; |
| |
| __asm __volatile ( |
| ".set push \n\t" |
| ".set noreorder \n\t" |
| "beqz %[len], 8f \n\t" |
| " nop \n\t" |
| "bltz %[shift], 4f \n\t" |
| " sra %[t0], %[len], 2 \n\t" |
| "beqz %[t0], 2f \n\t" |
| " andi %[len], %[len], 3 \n\t" |
| "1: \n\t" |
| "lh %[r0], 0(%[in]) \n\t" |
| "lh %[r1], 2(%[in]) \n\t" |
| "lh %[r2], 4(%[in]) \n\t" |
| "lh %[r3], 6(%[in]) \n\t" |
| "shllv_s.ph %[r0], %[r0], %[shift] \n\t" |
| "shllv_s.ph %[r1], %[r1], %[shift] \n\t" |
| "shllv_s.ph %[r2], %[r2], %[shift] \n\t" |
| "shllv_s.ph %[r3], %[r3], %[shift] \n\t" |
| "addiu %[in], %[in], 8 \n\t" |
| "addiu %[t0], %[t0], -1 \n\t" |
| "sh %[r0], 0(%[out]) \n\t" |
| "sh %[r1], 2(%[out]) \n\t" |
| "sh %[r2], 4(%[out]) \n\t" |
| "sh %[r3], 6(%[out]) \n\t" |
| "bgtz %[t0], 1b \n\t" |
| " addiu %[out], %[out], 8 \n\t" |
| "2: \n\t" |
| "beqz %[len], 8f \n\t" |
| " nop \n\t" |
| "3: \n\t" |
| "lh %[r0], 0(%[in]) \n\t" |
| "addiu %[in], %[in], 2 \n\t" |
| "addiu %[len], %[len], -1 \n\t" |
| "shllv_s.ph %[r0], %[r0], %[shift] \n\t" |
| "addiu %[out], %[out], 2 \n\t" |
| "bgtz %[len], 3b \n\t" |
| " sh %[r0], -2(%[out]) \n\t" |
| "b 8f \n\t" |
| "4: \n\t" |
| "negu %[shift], %[shift] \n\t" |
| "beqz %[t0], 6f \n\t" |
| " andi %[len], %[len], 3 \n\t" |
| "5: \n\t" |
| "lh %[r0], 0(%[in]) \n\t" |
| "lh %[r1], 2(%[in]) \n\t" |
| "lh %[r2], 4(%[in]) \n\t" |
| "lh %[r3], 6(%[in]) \n\t" |
| "srav %[r0], %[r0], %[shift] \n\t" |
| "srav %[r1], %[r1], %[shift] \n\t" |
| "srav %[r2], %[r2], %[shift] \n\t" |
| "srav %[r3], %[r3], %[shift] \n\t" |
| "addiu %[in], %[in], 8 \n\t" |
| "addiu %[t0], %[t0], -1 \n\t" |
| "sh %[r0], 0(%[out]) \n\t" |
| "sh %[r1], 2(%[out]) \n\t" |
| "sh %[r2], 4(%[out]) \n\t" |
| "sh %[r3], 6(%[out]) \n\t" |
| "bgtz %[t0], 5b \n\t" |
| " addiu %[out], %[out], 8 \n\t" |
| "6: \n\t" |
| "beqz %[len], 8f \n\t" |
| " nop \n\t" |
| "7: \n\t" |
| "lh %[r0], 0(%[in]) \n\t" |
| "addiu %[in], %[in], 2 \n\t" |
| "addiu %[len], %[len], -1 \n\t" |
| "srav %[r0], %[r0], %[shift] \n\t" |
| "addiu %[out], %[out], 2 \n\t" |
| "bgtz %[len], 7b \n\t" |
| " sh %[r0], -2(%[out]) \n\t" |
| "8: \n\t" |
| ".set pop \n\t" |
| : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1), |
| [r2] "=&r" (r2), [r3] "=&r" (r3) |
| : [len] "r" (len), [shift] "r" (shift), [in] "r" (in), |
| [out] "r" (out) |
| : "memory" |
| ); |
| } |
| #endif |
| |
| // Normalize the real-valued signal |in|, the input to forward FFT. |
| void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst, |
| const int16_t* in, |
| int16_t* out) { |
| int32_t r0, r1, r2, r3, t0; |
| int len = (int)inst->anaLen; |
| int shift = inst->normData; |
| |
| __asm __volatile ( |
| ".set push \n\t" |
| ".set noreorder \n\t" |
| "beqz %[len], 4f \n\t" |
| " sra %[t0], %[len], 2 \n\t" |
| "beqz %[t0], 2f \n\t" |
| " andi %[len], %[len], 3 \n\t" |
| "1: \n\t" |
| "lh %[r0], 0(%[in]) \n\t" |
| "lh %[r1], 2(%[in]) \n\t" |
| "lh %[r2], 4(%[in]) \n\t" |
| "lh %[r3], 6(%[in]) \n\t" |
| "sllv %[r0], %[r0], %[shift] \n\t" |
| "sllv %[r1], %[r1], %[shift] \n\t" |
| "sllv %[r2], %[r2], %[shift] \n\t" |
| "sllv %[r3], %[r3], %[shift] \n\t" |
| "addiu %[in], %[in], 8 \n\t" |
| "addiu %[t0], %[t0], -1 \n\t" |
| "sh %[r0], 0(%[out]) \n\t" |
| "sh %[r1], 2(%[out]) \n\t" |
| "sh %[r2], 4(%[out]) \n\t" |
| "sh %[r3], 6(%[out]) \n\t" |
| "bgtz %[t0], 1b \n\t" |
| " addiu %[out], %[out], 8 \n\t" |
| "2: \n\t" |
| "beqz %[len], 4f \n\t" |
| " nop \n\t" |
| "3: \n\t" |
| "lh %[r0], 0(%[in]) \n\t" |
| "addiu %[in], %[in], 2 \n\t" |
| "addiu %[len], %[len], -1 \n\t" |
| "sllv %[r0], %[r0], %[shift] \n\t" |
| "addiu %[out], %[out], 2 \n\t" |
| "bgtz %[len], 3b \n\t" |
| " sh %[r0], -2(%[out]) \n\t" |
| "4: \n\t" |
| ".set pop \n\t" |
| : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1), |
| [r2] "=&r" (r2), [r3] "=&r" (r3) |
| : [len] "r" (len), [shift] "r" (shift), [in] "r" (in), |
| [out] "r" (out) |
| : "memory" |
| ); |
| } |
| |