| commit abeeae570efff38dceccf68f5352809c58ffdda2 |
| Author: Phoebe Wang <phoebe.wang@intel.com> |
| Date: Thu Jun 30 16:40:29 2022 +0800 |
| |
| [X86] Support `_Float16` on SSE2 and up |
| |
| This is split from D113107 to address #56204 and https://discourse.llvm.org/t/how-to-build-compiler-rt-for-new-x86-half-float-abi/63366 |
| |
| Reviewed By: zahiraam, rjmccall, bkramer, MaskRay |
| |
| Differential Revision: https://reviews.llvm.org/D128571 |
| |
| diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst |
| index af697fafd8c4..1bac2aee84bd 100644 |
| --- a/clang/docs/LanguageExtensions.rst |
| +++ b/clang/docs/LanguageExtensions.rst |
| @@ -743,7 +743,13 @@ targets pending ABI standardization: |
| * 64-bit ARM (AArch64) |
| * AMDGPU |
| * SPIR |
| -* X86 (Only available under feature AVX512-FP16) |
| +* X86 (see below) |
| + |
| +On X86 targets, ``_Float16`` is supported as long as SSE2 is available, which |
| +includes all 64-bit and all recent 32-bit processors. When the target supports |
| +AVX512-FP16, ``_Float16`` arithmetic is performed using that native support. |
| +Otherwise, ``_Float16`` arithmetic is performed by promoting to ``float``, |
| +performing the operation, and then truncating to ``_Float16``. |
| |
| ``_Float16`` will be supported on more targets as they define ABIs for it. |
| |
| diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst |
| index 99288d0ffac4..b80e401e02f6 100644 |
| --- a/clang/docs/ReleaseNotes.rst |
| +++ b/clang/docs/ReleaseNotes.rst |
| @@ -516,6 +516,9 @@ X86 Support in Clang |
| |
| - Support ``-mharden-sls=[none|all|return|indirect-jmp]`` for straight-line |
| speculation hardening. |
| +- Support for the ``_Float16`` type has been added for all targets with SSE2. |
| + When AVX512-FP16 is not available, arithmetic on ``_Float16`` is emulated |
| + using ``float``. |
| |
| DWARF Support in Clang |
| ---------------------- |
| diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp |
| index b83b3517ddf9..06988830eaed 100644 |
| --- a/clang/lib/Basic/Targets/X86.cpp |
| +++ b/clang/lib/Basic/Targets/X86.cpp |
| @@ -239,7 +239,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, |
| HasAVX512ER = true; |
| } else if (Feature == "+avx512fp16") { |
| HasAVX512FP16 = true; |
| - HasFloat16 = true; |
| } else if (Feature == "+avx512pf") { |
| HasAVX512PF = true; |
| } else if (Feature == "+avx512dq") { |
| @@ -355,6 +354,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, |
| .Default(NoSSE); |
| SSELevel = std::max(SSELevel, Level); |
| |
| + HasFloat16 = SSELevel >= SSE2; |
| + |
| MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch<MMX3DNowEnum>(Feature) |
| .Case("+3dnowa", AMD3DNowAthlon) |
| .Case("+3dnow", AMD3DNow) |
| diff --git a/clang/test/CodeGen/X86/Float16-arithmetic.c b/clang/test/CodeGen/X86/Float16-arithmetic.c |
| new file mode 100644 |
| index 000000000000..aa61f7cb3c65 |
| --- /dev/null |
| +++ b/clang/test/CodeGen/X86/Float16-arithmetic.c |
| @@ -0,0 +1,112 @@ |
| +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py |
| +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s |
| + |
| + |
| +// CHECK-LABEL: @add1( |
| +// CHECK-NEXT: entry: |
| +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 |
| +// CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 |
| +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 |
| +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 |
| +// CHECK-NEXT: [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]] |
| +// CHECK-NEXT: ret half [[ADD]] |
| +// |
| +_Float16 add1(_Float16 a, _Float16 b) { |
| + return a + b; |
| +} |
| + |
| +// CHECK-LABEL: @add2( |
| +// CHECK-NEXT: entry: |
| +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 |
| +// CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 |
| +// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2 |
| +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 |
| +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 |
| +// CHECK-NEXT: [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]] |
| +// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 |
| +// CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ADD]], [[TMP2]] |
| +// CHECK-NEXT: ret half [[ADD1]] |
| +// |
| +_Float16 add2(_Float16 a, _Float16 b, _Float16 c) { |
| + return a + b + c; |
| +} |
| + |
| +// CHECK-LABEL: @div( |
| +// CHECK-NEXT: entry: |
| +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 |
| +// CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 |
| +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 |
| +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 |
| +// CHECK-NEXT: [[DIV:%.*]] = fdiv half [[TMP0]], [[TMP1]] |
| +// CHECK-NEXT: ret half [[DIV]] |
| +// |
| +_Float16 div(_Float16 a, _Float16 b) { |
| + return a / b; |
| +} |
| + |
| +// CHECK-LABEL: @mul( |
| +// CHECK-NEXT: entry: |
| +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 |
| +// CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 |
| +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 |
| +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 |
| +// CHECK-NEXT: [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]] |
| +// CHECK-NEXT: ret half [[MUL]] |
| +// |
| +_Float16 mul(_Float16 a, _Float16 b) { |
| + return a * b; |
| +} |
| + |
| +// CHECK-LABEL: @add_and_mul1( |
| +// CHECK-NEXT: entry: |
| +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 |
| +// CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 |
| +// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2 |
| +// CHECK-NEXT: store half [[D:%.*]], ptr [[D_ADDR]], align 2 |
| +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 |
| +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 |
| +// CHECK-NEXT: [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]] |
| +// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 |
| +// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 |
| +// CHECK-NEXT: [[MUL1:%.*]] = fmul half [[TMP2]], [[TMP3]] |
| +// CHECK-NEXT: [[ADD:%.*]] = fadd half [[MUL]], [[MUL1]] |
| +// CHECK-NEXT: ret half [[ADD]] |
| +// |
| +_Float16 add_and_mul1(_Float16 a, _Float16 b, _Float16 c, _Float16 d) { |
| + return a * b + c * d; |
| +} |
| + |
| +// CHECK-LABEL: @add_and_mul2( |
| +// CHECK-NEXT: entry: |
| +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca half, align 2 |
| +// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 |
| +// CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 |
| +// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2 |
| +// CHECK-NEXT: store half [[D:%.*]], ptr [[D_ADDR]], align 2 |
| +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 |
| +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 |
| +// CHECK-NEXT: [[MUL:%.*]] = fmul half 0xH4600, [[TMP1]] |
| +// CHECK-NEXT: [[SUB:%.*]] = fsub half [[TMP0]], [[MUL]] |
| +// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 |
| +// CHECK-NEXT: [[ADD:%.*]] = fadd half [[SUB]], [[TMP2]] |
| +// CHECK-NEXT: ret half [[ADD]] |
| +// |
| +_Float16 add_and_mul2(_Float16 a, _Float16 b, _Float16 c, _Float16 d) { |
| + return (a - 6 * b) + c; |
| +} |
| diff --git a/clang/test/CodeGen/X86/avx512fp16-complex.c b/clang/test/CodeGen/X86/Float16-complex.c |
| similarity index 96% |
| rename from clang/test/CodeGen/X86/avx512fp16-complex.c |
| rename to clang/test/CodeGen/X86/Float16-complex.c |
| index 8a6b50eb0056..ebb290c976e7 100644 |
| --- a/clang/test/CodeGen/X86/avx512fp16-complex.c |
| +++ b/clang/test/CodeGen/X86/Float16-complex.c |
| @@ -1,4 +1,5 @@ |
| // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefix=X86 |
| +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=X86 |
| |
| _Float16 _Complex add_half_rr(_Float16 a, _Float16 b) { |
| // X86-LABEL: @add_half_rr( |
| diff --git a/clang/test/Sema/Float16.c b/clang/test/Sema/Float16.c |
| index f0b94666f74d..26c604fed27a 100644 |
| --- a/clang/test/Sema/Float16.c |
| +++ b/clang/test/Sema/Float16.c |
| @@ -1,5 +1,6 @@ |
| -// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s |
| -// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s -DHAVE |
| +// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s |
| +// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE |
| +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE |
| // RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE |
| // RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE |
| // RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE |
| diff --git a/clang/test/Sema/conversion-target-dep.c b/clang/test/Sema/conversion-target-dep.c |
| index 958a4d8b1f07..1b22cb820934 100644 |
| --- a/clang/test/Sema/conversion-target-dep.c |
| +++ b/clang/test/Sema/conversion-target-dep.c |
| @@ -6,7 +6,7 @@ |
| |
| long double ld; |
| double d; |
| -_Float16 f16; // x86-error {{_Float16 is not supported on this target}} |
| +_Float16 f16; |
| |
| int main(void) { |
| ld = d; // x86-warning {{implicit conversion increases floating-point precision: 'double' to 'long double'}} |
| diff --git a/clang/test/SemaCXX/Float16.cpp b/clang/test/SemaCXX/Float16.cpp |
| index f27c3839854e..61b02a50687b 100644 |
| --- a/clang/test/SemaCXX/Float16.cpp |
| +++ b/clang/test/SemaCXX/Float16.cpp |
| @@ -1,4 +1,6 @@ |
| -// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s |
| +// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s |
| +// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE |
| +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE |
| // RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE |
| // RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE |
| // RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE |
| diff --git a/compiler-rt/test/builtins/CMakeLists.txt b/compiler-rt/test/builtins/CMakeLists.txt |
| index d56ffc69763b..1579e223e875 100644 |
| --- a/compiler-rt/test/builtins/CMakeLists.txt |
| +++ b/compiler-rt/test/builtins/CMakeLists.txt |
| @@ -44,9 +44,17 @@ foreach(arch ${BUILTIN_TEST_ARCH}) |
| string(REPLACE ";" " " BUILTINS_TEST_TARGET_CFLAGS "${BUILTINS_TEST_TARGET_CFLAGS}") |
| endif() |
| |
| - if (${arch} MATCHES "arm|aarch64|arm64" AND COMPILER_RT_HAS_FLOAT16) |
| - list(APPEND BUILTINS_TEST_TARGET_CFLAGS -DCOMPILER_RT_HAS_FLOAT16) |
| - string(REPLACE ";" " " BUILTINS_TEST_TARGET_CFLAGS "${BUILTINS_TEST_TARGET_CFLAGS}") |
| + if(APPLE) |
| + # TODO: Support the new ABI on Apple platforms. |
| + if (${arch} MATCHES "arm|aarch64|arm64" AND COMPILER_RT_HAS_FLOAT16) |
| + list(APPEND BUILTINS_TEST_TARGET_CFLAGS -DCOMPILER_RT_HAS_FLOAT16) |
| + string(REPLACE ";" " " BUILTINS_TEST_TARGET_CFLAGS "${BUILTINS_TEST_TARGET_CFLAGS}") |
| + endif() |
| + else() |
| + if (${arch} MATCHES "arm|aarch64|arm64|i?86|x86_64|AMD64" AND COMPILER_RT_HAS_FLOAT16) |
| + list(APPEND BUILTINS_TEST_TARGET_CFLAGS -DCOMPILER_RT_HAS_FLOAT16) |
| + string(REPLACE ";" " " BUILTINS_TEST_TARGET_CFLAGS "${BUILTINS_TEST_TARGET_CFLAGS}") |
| + endif() |
| endif() |
| |
| if(COMPILER_RT_ENABLE_CET) |