blob: ba7c4cfcff84e2a9d57d718c5bf0b9a9344ec177 [file] [log] [blame]
diff --git a/third_party/pffft/src/pffft.c b/third_party/pffft/src/pffft.c
index 776f564aa28c..643836626c0f 100644
--- a/third_party/pffft/src/pffft.c
+++ b/third_party/pffft/src/pffft.c
@@ -59,7 +59,6 @@
#include "pffft.h"
#include <stdlib.h>
-// #include <stdio.h>
#include <math.h>
#include <assert.h>
@@ -75,11 +74,14 @@
# define NEVER_INLINE(return_type) return_type __attribute__ ((noinline))
# define RESTRICT __restrict
# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__];
+# define VLA_ARRAY_ON_STACK_FREE(varname__)
#elif defined(COMPILER_MSVC)
+#include <malloc.h>
# define ALWAYS_INLINE(return_type) __forceinline return_type
# define NEVER_INLINE(return_type) __declspec(noinline) return_type
# define RESTRICT __restrict
-# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__))
+# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_malloca(size__ * sizeof(type__))
+# define VLA_ARRAY_ON_STACK_FREE(varname__) _freea(varname__)
#endif
@@ -219,35 +221,24 @@ void validate_pffft_simd() {
memcpy(a3.f, f+12, 4*sizeof(float));
t = a0; u = a1; t.v = VZERO();
- // printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 0, 0, 0, 0);
t.v = VADD(a1.v, a2.v);
- // printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 12, 14, 16, 18);
t.v = VMUL(a1.v, a2.v);
- // printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 32, 45, 60, 77);
t.v = VMADD(a1.v, a2.v,a0.v);
- // printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 32, 46, 62, 80);
INTERLEAVE2(a1.v,a2.v,t.v,u.v);
- // printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11);
UNINTERLEAVE2(a1.v,a2.v,t.v,u.v);
- // printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11);
t.v=LD_PS1(f[15]);
- // printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 15, 15, 15, 15);
t.v = VSWAPHL(a1.v, a2.v);
- // printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 8, 9, 6, 7);
VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v);
- // printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
- // a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3],
- // a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15);
}
#endif //!PFFFT_SIMD_DISABLE
@@ -1674,6 +1665,8 @@ void pffft_transform_internal(PFFFT_Setup *setup, const float *finput, float *fo
ib = !ib;
}
assert(buff[ib] == voutput);
+
+ VLA_ARRAY_ON_STACK_FREE(scratch_on_stack);
}
void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b, float *ab, float scaling) {
@@ -1851,6 +1844,8 @@ void pffft_transform_internal_nosimd(PFFFT_Setup *setup, const float *input, flo
ib = !ib;
}
assert(buff[ib] == output);
+
+ VLA_ARRAY_ON_STACK_FREE(scratch_on_stack);
}
#define pffft_zconvolve_accumulate_nosimd pffft_zconvolve_accumulate