| From a76620143c54e0b40c7538f4ffa38f4c9db8a009 Mon Sep 17 00:00:00 2001 |
| From: Oliver Stannard <oliver.stannard@linaro.org> |
| Date: Wed, 9 Feb 2022 17:43:42 +0000 |
| Subject: [PATCH] [ARM] Patterns for vector conversion between half and float |
| |
| These patterns were omitted because clang only allows converting between |
| these types using intrinsics, but other front-ends or optimisation |
| passes may want to use them. |
| |
| Differential revision: https://reviews.llvm.org/D119354 |
| --- |
| llvm/lib/Target/ARM/ARMInstrNEON.td | 3 ++ |
| llvm/test/CodeGen/ARM/fp16-vector-cvt.ll | 59 ++++++++++++++++++++++++ |
| 2 files changed, 62 insertions(+) |
| create mode 100644 llvm/test/CodeGen/ARM/fp16-vector-cvt.ll |
| |
| diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td |
| index 357aa6d062e9..cdad8e106de6 100644 |
| --- a/llvm/lib/Target/ARM/ARMInstrNEON.td |
| +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td |
| @@ -6946,6 +6946,9 @@ def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, |
| v4f32, v4i16, int_arm_neon_vcvthf2fp>, |
| Requires<[HasNEON, HasFP16]>; |
| |
| +def : Pat<(v4f16 (fpround (v4f32 QPR:$src))), (VCVTf2h QPR:$src)>; |
| +def : Pat<(v4f32 (fpextend (v4f16 DPR:$src))), (VCVTh2f DPR:$src)>; |
| + |
| // Vector Reverse. |
| |
| // VREV64 : Vector Reverse elements within 64-bit doublewords |
| diff --git a/llvm/test/CodeGen/ARM/fp16-vector-cvt.ll b/llvm/test/CodeGen/ARM/fp16-vector-cvt.ll |
| new file mode 100644 |
| index 000000000000..43cdbe85da49 |
| --- /dev/null |
| +++ b/llvm/test/CodeGen/ARM/fp16-vector-cvt.ll |
| @@ -0,0 +1,59 @@ |
| +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| +; RUN: llc -mtriple armv8a-none-none-eabihf -mattr=fullfp16 < %s | FileCheck %s |
| + |
| +define <4 x half> @fptrunc_vector_f32_f16(<4 x float> %a) { |
| +; CHECK-LABEL: fptrunc_vector_f32_f16: |
| +; CHECK: @ %bb.0: @ %bb |
| +; CHECK-NEXT: vcvt.f16.f32 d0, q0 |
| +; CHECK-NEXT: bx lr |
| +bb: |
| + %z = fptrunc <4 x float> %a to <4 x half> |
| + ret <4 x half> %z |
| +} |
| + |
| +define <4 x half> @fptrunc_vector_f64_f16(<4 x double> %a) { |
| +; CHECK-LABEL: fptrunc_vector_f64_f16: |
| +; CHECK: @ %bb.0: @ %bb |
| +; CHECK-NEXT: vcvtb.f16.f64 s0, d0 |
| +; CHECK-NEXT: vcvtb.f16.f64 s8, d1 |
| +; CHECK-NEXT: vmov r1, s0 |
| +; CHECK-NEXT: vcvtb.f16.f64 s2, d2 |
| +; CHECK-NEXT: vmov r0, s8 |
| +; CHECK-NEXT: vmov.16 d0[0], r1 |
| +; CHECK-NEXT: vmov.16 d0[1], r0 |
| +; CHECK-NEXT: vmov r0, s2 |
| +; CHECK-NEXT: vcvtb.f16.f64 s2, d3 |
| +; CHECK-NEXT: vmov.16 d0[2], r0 |
| +; CHECK-NEXT: vmov r0, s2 |
| +; CHECK-NEXT: vmov.16 d0[3], r0 |
| +; CHECK-NEXT: bx lr |
| +bb: |
| + %z = fptrunc <4 x double> %a to <4 x half> |
| + ret <4 x half> %z |
| +} |
| + |
| +define <4 x float> @fpext_vector_f16_f32(<4 x half> %a) { |
| +; CHECK-LABEL: fpext_vector_f16_f32: |
| +; CHECK: @ %bb.0: @ %bb |
| +; CHECK-NEXT: vcvt.f32.f16 q0, d0 |
| +; CHECK-NEXT: bx lr |
| +bb: |
| + %z = fpext <4 x half> %a to <4 x float> |
| + ret <4 x float> %z |
| +} |
| + |
| +define <4 x double> @fpext_vector_f16_f64(<4 x half> %a) { |
| +; CHECK-LABEL: fpext_vector_f16_f64: |
| +; CHECK: @ %bb.0: @ %bb |
| +; CHECK-NEXT: vmovx.f16 s4, s0 |
| +; CHECK-NEXT: vmovx.f16 s2, s1 |
| +; CHECK-NEXT: vcvtb.f64.f16 d17, s4 |
| +; CHECK-NEXT: vcvtb.f64.f16 d3, s2 |
| +; CHECK-NEXT: vcvtb.f64.f16 d16, s0 |
| +; CHECK-NEXT: vcvtb.f64.f16 d2, s1 |
| +; CHECK-NEXT: vorr q0, q8, q8 |
| +; CHECK-NEXT: bx lr |
| +bb: |
| + %z = fpext <4 x half> %a to <4 x double> |
| + ret <4 x double> %z |
| +} |
| -- |
| 2.36.0.rc0.470.gd361397f0d-goog |
| |