| commit 21f26470e9747c472d3c18654e676cbea8393635 |
| Author: Hans Wennborg <hans@chromium.org> |
| Date: Mon Nov 25 16:27:53 2019 +0100 |
| |
| Revert 3f91705ca54 "ARM-NEON: make type modifiers orthogonal and allow multiple modifiers." |
| |
| This broke the vcreate_u64 intrinsic. Example: |
| |
| $ cat /tmp/a.cc |
| #include <arm_neon.h> |
| |
| void g() { |
| auto v = vcreate_u64(0); |
| } |
| $ bin/clang -c /tmp/a.cc --target=arm-linux-androideabi16 -march=armv7-a |
| /tmp/a.cc:4:12: error: C-style cast from scalar 'int' to vector 'uint64x1_t' (vector of 1 'uint64_t' value) of different size |
| auto v = vcreate_u64(0); |
| ^~~~~~~~~~~~~~ |
| /work/llvm.monorepo/build.release/lib/clang/10.0.0/include/arm_neon.h:4144:11: note: expanded from macro 'vcreate_u64' |
| __ret = (uint64x1_t)(__p0); \ |
| ^~~~~~~~~~~~~~~~~~ |
| |
| Reverting until this can be investigated. |
| |
| > The modifier system used to mutate types on NEON intrinsic definitions had a |
| > separate letter for all kinds of transformations that might be needed, and we |
| > were quite quickly running out of letters to use. This patch converts to a much |
| > smaller set of orthogonal modifiers that can be applied together to achieve the |
| > desired effect. |
| > |
| > When merging with downstream it is likely to cause a conflict with any local |
| > modifications to the .td files. There is a new script in |
| > utils/convert_arm_neon.py that was used to convert all .td definitions and I |
| > would suggest running it on the last downstream version of those files before |
| > this commit rather than resolving conflicts manually. |
| |
| diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td |
| index 79cd16233c1..bb9873efac8 100644 |
| --- a/clang/include/clang/Basic/arm_fp16.td |
| +++ b/clang/include/clang/Basic/arm_fp16.td |
| @@ -17,118 +17,118 @@ include "arm_neon_incl.td" |
| let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in { |
| |
| // Negate |
| - def VNEGSH : SInst<"vneg", "11", "Sh">; |
| + def VNEGSH : SInst<"vneg", "ss", "Sh">; |
| |
| // Reciprocal/Sqrt |
| - def SCALAR_FRECPSH : IInst<"vrecps", "111", "Sh">; |
| - def FSQRTSH : SInst<"vsqrt", "11", "Sh">; |
| - def SCALAR_FRSQRTSH : IInst<"vrsqrts", "111", "Sh">; |
| + def SCALAR_FRECPSH : IInst<"vrecps", "sss", "Sh">; |
| + def FSQRTSH : SInst<"vsqrt", "ss", "Sh">; |
| + def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">; |
| |
| // Reciprocal Estimate |
| - def SCALAR_FRECPEH : IInst<"vrecpe", "11", "Sh">; |
| + def SCALAR_FRECPEH : IInst<"vrecpe", "ss", "Sh">; |
| |
| // Reciprocal Exponent |
| - def SCALAR_FRECPXH : IInst<"vrecpx", "11", "Sh">; |
| + def SCALAR_FRECPXH : IInst<"vrecpx", "ss", "Sh">; |
| |
| // Reciprocal Square Root Estimate |
| - def SCALAR_FRSQRTEH : IInst<"vrsqrte", "11", "Sh">; |
| + def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">; |
| |
| // Rounding |
| - def FRINTZ_S64H : SInst<"vrnd", "11", "Sh">; |
| - def FRINTA_S64H : SInst<"vrnda", "11", "Sh">; |
| - def FRINTI_S64H : SInst<"vrndi", "11", "Sh">; |
| - def FRINTM_S64H : SInst<"vrndm", "11", "Sh">; |
| - def FRINTN_S64H : SInst<"vrndn", "11", "Sh">; |
| - def FRINTP_S64H : SInst<"vrndp", "11", "Sh">; |
| - def FRINTX_S64H : SInst<"vrndx", "11", "Sh">; |
| + def FRINTZ_S64H : SInst<"vrnd", "ss", "Sh">; |
| + def FRINTA_S64H : SInst<"vrnda", "ss", "Sh">; |
| + def FRINTI_S64H : SInst<"vrndi", "ss", "Sh">; |
| + def FRINTM_S64H : SInst<"vrndm", "ss", "Sh">; |
| + def FRINTN_S64H : SInst<"vrndn", "ss", "Sh">; |
| + def FRINTP_S64H : SInst<"vrndp", "ss", "Sh">; |
| + def FRINTX_S64H : SInst<"vrndx", "ss", "Sh">; |
| |
| // Conversion |
| - def SCALAR_SCVTFSH : SInst<"vcvth_f16", "(1F)(1!)", "sUs">; |
| - def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "(1F<)(1!)", "iUi">; |
| - def SCALAR_SCVTFSH2 : SInst<"vcvth_f16", "(1F<<)(1!)", "lUl">; |
| - def SCALAR_FCVTZSH : SInst<"vcvt_s16", "(1S)1", "Sh">; |
| - def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "(1S>)1", "Sh">; |
| - def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "(1S>>)1", "Sh">; |
| - def SCALAR_FCVTZUH : SInst<"vcvt_u16", "(1U)1", "Sh">; |
| - def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "(1U>)1", "Sh">; |
| - def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "(1U>>)1", "Sh">; |
| - def SCALAR_FCVTASH : SInst<"vcvta_s16", "(1S)1", "Sh">; |
| - def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "(1S>)1", "Sh">; |
| - def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "(1S>>)1", "Sh">; |
| - def SCALAR_FCVTAUH : SInst<"vcvta_u16", "(1U)1", "Sh">; |
| - def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "(1U>)1", "Sh">; |
| - def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "(1U>>)1", "Sh">; |
| - def SCALAR_FCVTMSH : SInst<"vcvtm_s16", "(1S)1", "Sh">; |
| - def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "(1S>)1", "Sh">; |
| - def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "(1S>>)1", "Sh">; |
| - def SCALAR_FCVTMUH : SInst<"vcvtm_u16", "(1U)1", "Sh">; |
| - def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "(1U>)1", "Sh">; |
| - def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "(1U>>)1", "Sh">; |
| - def SCALAR_FCVTNSH : SInst<"vcvtn_s16", "(1S)1", "Sh">; |
| - def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "(1S>)1", "Sh">; |
| - def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "(1S>>)1", "Sh">; |
| - def SCALAR_FCVTNUH : SInst<"vcvtn_u16", "(1U)1", "Sh">; |
| - def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "(1U>)1", "Sh">; |
| - def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "(1U>>)1", "Sh">; |
| - def SCALAR_FCVTPSH : SInst<"vcvtp_s16", "(1S)1", "Sh">; |
| - def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "(1S>)1", "Sh">; |
| - def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "(1S>>)1", "Sh">; |
| - def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "(1U)1", "Sh">; |
| - def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "(1U>)1", "Sh">; |
| - def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "(1U>>)1", "Sh">; |
| + def SCALAR_SCVTFSH : SInst<"vcvth_f16", "Ys", "sUs">; |
| + def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "Ys", "iUi">; |
| + def SCALAR_SCVTFSH2 : SInst<"vcvth_f16", "Ys", "lUl">; |
| + def SCALAR_FCVTZSH : SInst<"vcvt_s16", "$s", "Sh">; |
| + def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">; |
| + def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">; |
| + def SCALAR_FCVTZUH : SInst<"vcvt_u16", "bs", "Sh">; |
| + def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">; |
| + def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">; |
| + def SCALAR_FCVTASH : SInst<"vcvta_s16", "$s", "Sh">; |
| + def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">; |
| + def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">; |
| + def SCALAR_FCVTAUH : SInst<"vcvta_u16", "bs", "Sh">; |
| + def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">; |
| + def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">; |
| + def SCALAR_FCVTMSH : SInst<"vcvtm_s16", "$s", "Sh">; |
| + def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">; |
| + def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "Ls", "Sh">; |
| + def SCALAR_FCVTMUH : SInst<"vcvtm_u16", "bs", "Sh">; |
| + def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">; |
| + def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "Os", "Sh">; |
| + def SCALAR_FCVTNSH : SInst<"vcvtn_s16", "$s", "Sh">; |
| + def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">; |
| + def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "Ls", "Sh">; |
| + def SCALAR_FCVTNUH : SInst<"vcvtn_u16", "bs", "Sh">; |
| + def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">; |
| + def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "Os", "Sh">; |
| + def SCALAR_FCVTPSH : SInst<"vcvtp_s16", "$s", "Sh">; |
| + def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">; |
| + def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "Ls", "Sh">; |
| + def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "bs", "Sh">; |
| + def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">; |
| + def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">; |
| let isVCVT_N = 1 in { |
| - def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "(1F)(1!)I", "sUs">; |
| - def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">; |
| - def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">; |
| - def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh">; |
| - def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh">; |
| - def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh">; |
| - def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "(1U)1I", "Sh">; |
| - def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "(1U>)1I", "Sh">; |
| - def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "(1U>>)1I", "Sh">; |
| + def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "sUs">; |
| + def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "Ysi", "iUi">; |
| + def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "Ysi", "lUl">; |
| + def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">; |
| + def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">; |
| + def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">; |
| + def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">; |
| + def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">; |
| + def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">; |
| } |
| // Comparison |
| - def SCALAR_CMEQRH : SInst<"vceq", "(1U)11", "Sh">; |
| - def SCALAR_CMEQZH : SInst<"vceqz", "(1U)1", "Sh">; |
| - def SCALAR_CMGERH : SInst<"vcge", "(1U)11", "Sh">; |
| - def SCALAR_CMGEZH : SInst<"vcgez", "(1U)1", "Sh">; |
| - def SCALAR_CMGTRH : SInst<"vcgt", "(1U)11", "Sh">; |
| - def SCALAR_CMGTZH : SInst<"vcgtz", "(1U)1", "Sh">; |
| - def SCALAR_CMLERH : SInst<"vcle", "(1U)11", "Sh">; |
| - def SCALAR_CMLEZH : SInst<"vclez", "(1U)1", "Sh">; |
| - def SCALAR_CMLTH : SInst<"vclt", "(1U)11", "Sh">; |
| - def SCALAR_CMLTZH : SInst<"vcltz", "(1U)1", "Sh">; |
| + def SCALAR_CMEQRH : SInst<"vceq", "bss", "Sh">; |
| + def SCALAR_CMEQZH : SInst<"vceqz", "bs", "Sh">; |
| + def SCALAR_CMGERH : SInst<"vcge", "bss", "Sh">; |
| + def SCALAR_CMGEZH : SInst<"vcgez", "bs", "Sh">; |
| + def SCALAR_CMGTRH : SInst<"vcgt", "bss", "Sh">; |
| + def SCALAR_CMGTZH : SInst<"vcgtz", "bs", "Sh">; |
| + def SCALAR_CMLERH : SInst<"vcle", "bss", "Sh">; |
| + def SCALAR_CMLEZH : SInst<"vclez", "bs", "Sh">; |
| + def SCALAR_CMLTH : SInst<"vclt", "bss", "Sh">; |
| + def SCALAR_CMLTZH : SInst<"vcltz", "bs", "Sh">; |
| |
| // Absolute Compare Mask Greater Than Or Equal |
| - def SCALAR_FACGEH : IInst<"vcage", "(1U)11", "Sh">; |
| - def SCALAR_FACLEH : IInst<"vcale", "(1U)11", "Sh">; |
| + def SCALAR_FACGEH : IInst<"vcage", "bss", "Sh">; |
| + def SCALAR_FACLEH : IInst<"vcale", "bss", "Sh">; |
| |
| // Absolute Compare Mask Greater Than |
| - def SCALAR_FACGT : IInst<"vcagt", "(1U)11", "Sh">; |
| - def SCALAR_FACLT : IInst<"vcalt", "(1U)11", "Sh">; |
| + def SCALAR_FACGT : IInst<"vcagt", "bss", "Sh">; |
| + def SCALAR_FACLT : IInst<"vcalt", "bss", "Sh">; |
| |
| // Scalar Absolute Value |
| - def SCALAR_ABSH : SInst<"vabs", "11", "Sh">; |
| + def SCALAR_ABSH : SInst<"vabs", "ss", "Sh">; |
| |
| // Scalar Absolute Difference |
| - def SCALAR_ABDH: IInst<"vabd", "111", "Sh">; |
| + def SCALAR_ABDH: IInst<"vabd", "sss", "Sh">; |
| |
| // Add/Sub |
| - def VADDSH : SInst<"vadd", "111", "Sh">; |
| - def VSUBHS : SInst<"vsub", "111", "Sh">; |
| + def VADDSH : SInst<"vadd", "sss", "Sh">; |
| + def VSUBHS : SInst<"vsub", "sss", "Sh">; |
| |
| // Max/Min |
| - def VMAXHS : SInst<"vmax", "111", "Sh">; |
| - def VMINHS : SInst<"vmin", "111", "Sh">; |
| - def FMAXNMHS : SInst<"vmaxnm", "111", "Sh">; |
| - def FMINNMHS : SInst<"vminnm", "111", "Sh">; |
| + def VMAXHS : SInst<"vmax", "sss", "Sh">; |
| + def VMINHS : SInst<"vmin", "sss", "Sh">; |
| + def FMAXNMHS : SInst<"vmaxnm", "sss", "Sh">; |
| + def FMINNMHS : SInst<"vminnm", "sss", "Sh">; |
| |
| // Multiplication/Division |
| - def VMULHS : SInst<"vmul", "111", "Sh">; |
| - def MULXHS : SInst<"vmulx", "111", "Sh">; |
| - def FDIVHS : SInst<"vdiv", "111", "Sh">; |
| + def VMULHS : SInst<"vmul", "sss", "Sh">; |
| + def MULXHS : SInst<"vmulx", "sss", "Sh">; |
| + def FDIVHS : SInst<"vdiv", "sss", "Sh">; |
| |
| // Vector fused multiply-add operations |
| - def VFMAHS : SInst<"vfma", "1111", "Sh">; |
| - def VFMSHS : SInst<"vfms", "1111", "Sh">; |
| + def VFMAHS : SInst<"vfma", "ssss", "Sh">; |
| + def VFMSHS : SInst<"vfms", "ssss", "Sh">; |
| } |
| diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td |
| index 25db142624e..127c5af97ce 100644 |
| --- a/clang/include/clang/Basic/arm_neon.td |
| +++ b/clang/include/clang/Basic/arm_neon.td |
| @@ -226,240 +226,240 @@ def OP_FMLSL_LN_Hi : Op<(call "vfmlsl_high", $p0, $p1, |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.1 Addition |
| -def VADD : IOpInst<"vadd", "...", |
| +def VADD : IOpInst<"vadd", "ddd", |
| "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUl", OP_ADD>; |
| -def VADDL : SOpInst<"vaddl", "(>Q)..", "csiUcUsUi", OP_ADDL>; |
| -def VADDW : SOpInst<"vaddw", "(>Q)(>Q).", "csiUcUsUi", OP_ADDW>; |
| -def VHADD : SInst<"vhadd", "...", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| -def VRHADD : SInst<"vrhadd", "...", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| -def VQADD : SInst<"vqadd", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| -def VADDHN : IInst<"vaddhn", "<QQ", "silUsUiUl">; |
| -def VRADDHN : IInst<"vraddhn", "<QQ", "silUsUiUl">; |
| +def VADDL : SOpInst<"vaddl", "wdd", "csiUcUsUi", OP_ADDL>; |
| +def VADDW : SOpInst<"vaddw", "wwd", "csiUcUsUi", OP_ADDW>; |
| +def VHADD : SInst<"vhadd", "ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| +def VRHADD : SInst<"vrhadd", "ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| +def VQADD : SInst<"vqadd", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| +def VADDHN : IInst<"vaddhn", "hkk", "silUsUiUl">; |
| +def VRADDHN : IInst<"vraddhn", "hkk", "silUsUiUl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.2 Multiplication |
| -def VMUL : IOpInst<"vmul", "...", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MUL>; |
| -def VMULP : SInst<"vmul", "...", "PcQPc">; |
| -def VMLA : IOpInst<"vmla", "....", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLA>; |
| -def VMLAL : SOpInst<"vmlal", "(>Q)(>Q)..", "csiUcUsUi", OP_MLAL>; |
| -def VMLS : IOpInst<"vmls", "....", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLS>; |
| -def VMLSL : SOpInst<"vmlsl", "(>Q)(>Q)..", "csiUcUsUi", OP_MLSL>; |
| -def VQDMULH : SInst<"vqdmulh", "...", "siQsQi">; |
| -def VQRDMULH : SInst<"vqrdmulh", "...", "siQsQi">; |
| +def VMUL : IOpInst<"vmul", "ddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MUL>; |
| +def VMULP : SInst<"vmul", "ddd", "PcQPc">; |
| +def VMLA : IOpInst<"vmla", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLA>; |
| +def VMLAL : SOpInst<"vmlal", "wwdd", "csiUcUsUi", OP_MLAL>; |
| +def VMLS : IOpInst<"vmls", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLS>; |
| +def VMLSL : SOpInst<"vmlsl", "wwdd", "csiUcUsUi", OP_MLSL>; |
| +def VQDMULH : SInst<"vqdmulh", "ddd", "siQsQi">; |
| +def VQRDMULH : SInst<"vqrdmulh", "ddd", "siQsQi">; |
| |
| let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in { |
| -def VQRDMLAH : SOpInst<"vqrdmlah", "....", "siQsQi", OP_QRDMLAH>; |
| -def VQRDMLSH : SOpInst<"vqrdmlsh", "....", "siQsQi", OP_QRDMLSH>; |
| +def VQRDMLAH : SOpInst<"vqrdmlah", "dddd", "siQsQi", OP_QRDMLAH>; |
| +def VQRDMLSH : SOpInst<"vqrdmlsh", "dddd", "siQsQi", OP_QRDMLSH>; |
| } |
| |
| -def VQDMLAL : SInst<"vqdmlal", "(>Q)(>Q)..", "si">; |
| -def VQDMLSL : SInst<"vqdmlsl", "(>Q)(>Q)..", "si">; |
| -def VMULL : SInst<"vmull", "(>Q)..", "csiUcUsUiPc">; |
| -def VQDMULL : SInst<"vqdmull", "(>Q)..", "si">; |
| +def VQDMLAL : SInst<"vqdmlal", "wwdd", "si">; |
| +def VQDMLSL : SInst<"vqdmlsl", "wwdd", "si">; |
| +def VMULL : SInst<"vmull", "wdd", "csiUcUsUiPc">; |
| +def VQDMULL : SInst<"vqdmull", "wdd", "si">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.3 Subtraction |
| -def VSUB : IOpInst<"vsub", "...", |
| +def VSUB : IOpInst<"vsub", "ddd", |
| "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUl", OP_SUB>; |
| -def VSUBL : SOpInst<"vsubl", "(>Q)..", "csiUcUsUi", OP_SUBL>; |
| -def VSUBW : SOpInst<"vsubw", "(>Q)(>Q).", "csiUcUsUi", OP_SUBW>; |
| -def VQSUB : SInst<"vqsub", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| -def VHSUB : SInst<"vhsub", "...", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| -def VSUBHN : IInst<"vsubhn", "<QQ", "silUsUiUl">; |
| -def VRSUBHN : IInst<"vrsubhn", "<QQ", "silUsUiUl">; |
| +def VSUBL : SOpInst<"vsubl", "wdd", "csiUcUsUi", OP_SUBL>; |
| +def VSUBW : SOpInst<"vsubw", "wwd", "csiUcUsUi", OP_SUBW>; |
| +def VQSUB : SInst<"vqsub", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| +def VHSUB : SInst<"vhsub", "ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| +def VSUBHN : IInst<"vsubhn", "hkk", "silUsUiUl">; |
| +def VRSUBHN : IInst<"vrsubhn", "hkk", "silUsUiUl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.4 Comparison |
| -def VCEQ : IOpInst<"vceq", "U..", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPc", OP_EQ>; |
| -def VCGE : SOpInst<"vcge", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GE>; |
| +def VCEQ : IOpInst<"vceq", "udd", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPc", OP_EQ>; |
| +def VCGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GE>; |
| let InstName = "vcge" in |
| -def VCLE : SOpInst<"vcle", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LE>; |
| -def VCGT : SOpInst<"vcgt", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GT>; |
| +def VCLE : SOpInst<"vcle", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LE>; |
| +def VCGT : SOpInst<"vcgt", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GT>; |
| let InstName = "vcgt" in |
| -def VCLT : SOpInst<"vclt", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LT>; |
| +def VCLT : SOpInst<"vclt", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LT>; |
| let InstName = "vacge" in { |
| -def VCAGE : IInst<"vcage", "U..", "fQf">; |
| -def VCALE : IInst<"vcale", "U..", "fQf">; |
| +def VCAGE : IInst<"vcage", "udd", "fQf">; |
| +def VCALE : IInst<"vcale", "udd", "fQf">; |
| } |
| let InstName = "vacgt" in { |
| -def VCAGT : IInst<"vcagt", "U..", "fQf">; |
| -def VCALT : IInst<"vcalt", "U..", "fQf">; |
| +def VCAGT : IInst<"vcagt", "udd", "fQf">; |
| +def VCALT : IInst<"vcalt", "udd", "fQf">; |
| } |
| -def VTST : WInst<"vtst", "U..", "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPs">; |
| +def VTST : WInst<"vtst", "udd", "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPs">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.5 Absolute Difference |
| -def VABD : SInst<"vabd", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">; |
| -def VABDL : SOpInst<"vabdl", "(>Q)..", "csiUcUsUi", OP_ABDL>; |
| -def VABA : SOpInst<"vaba", "....", "csiUcUsUiQcQsQiQUcQUsQUi", OP_ABA>; |
| -def VABAL : SOpInst<"vabal", "(>Q)(>Q)..", "csiUcUsUi", OP_ABAL>; |
| +def VABD : SInst<"vabd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; |
| +def VABDL : SOpInst<"vabdl", "wdd", "csiUcUsUi", OP_ABDL>; |
| +def VABA : SOpInst<"vaba", "dddd", "csiUcUsUiQcQsQiQUcQUsQUi", OP_ABA>; |
| +def VABAL : SOpInst<"vabal", "wwdd", "csiUcUsUi", OP_ABAL>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.6 Max/Min |
| -def VMAX : SInst<"vmax", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">; |
| -def VMIN : SInst<"vmin", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">; |
| +def VMAX : SInst<"vmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; |
| +def VMIN : SInst<"vmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.7 Pairwise Addition |
| -def VPADD : IInst<"vpadd", "...", "csiUcUsUif">; |
| -def VPADDL : SInst<"vpaddl", ">.", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| -def VPADAL : SInst<"vpadal", ">>.", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| +def VPADD : IInst<"vpadd", "ddd", "csiUcUsUif">; |
| +def VPADDL : SInst<"vpaddl", "nd", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| +def VPADAL : SInst<"vpadal", "nnd", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.8-9 Folding Max/Min |
| -def VPMAX : SInst<"vpmax", "...", "csiUcUsUif">; |
| -def VPMIN : SInst<"vpmin", "...", "csiUcUsUif">; |
| +def VPMAX : SInst<"vpmax", "ddd", "csiUcUsUif">; |
| +def VPMIN : SInst<"vpmin", "ddd", "csiUcUsUif">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.10 Reciprocal/Sqrt |
| -def VRECPS : IInst<"vrecps", "...", "fQf">; |
| -def VRSQRTS : IInst<"vrsqrts", "...", "fQf">; |
| +def VRECPS : IInst<"vrecps", "ddd", "fQf">; |
| +def VRSQRTS : IInst<"vrsqrts", "ddd", "fQf">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.11 Shifts by signed variable |
| -def VSHL : SInst<"vshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| -def VQSHL : SInst<"vqshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| -def VRSHL : SInst<"vrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| -def VQRSHL : SInst<"vqrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| +def VSHL : SInst<"vshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| +def VQSHL : SInst<"vqshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| +def VRSHL : SInst<"vrshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| +def VQRSHL : SInst<"vqrshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.12 Shifts by constant |
| let isShift = 1 in { |
| -def VSHR_N : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| -def VSHL_N : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| -def VRSHR_N : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| -def VSRA_N : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| -def VRSRA_N : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| -def VQSHL_N : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| -def VQSHLU_N : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl">; |
| -def VSHRN_N : IInst<"vshrn_n", "<QI", "silUsUiUl">; |
| -def VQSHRUN_N : SInst<"vqshrun_n", "(<U)QI", "sil">; |
| -def VQRSHRUN_N : SInst<"vqrshrun_n", "(<U)QI", "sil">; |
| -def VQSHRN_N : SInst<"vqshrn_n", "<QI", "silUsUiUl">; |
| -def VRSHRN_N : IInst<"vrshrn_n", "<QI", "silUsUiUl">; |
| -def VQRSHRN_N : SInst<"vqrshrn_n", "<QI", "silUsUiUl">; |
| -def VSHLL_N : SInst<"vshll_n", "(>Q).I", "csiUcUsUi">; |
| +def VSHR_N : SInst<"vshr_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| +def VSHL_N : IInst<"vshl_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| +def VRSHR_N : SInst<"vrshr_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| +def VSRA_N : SInst<"vsra_n", "dddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| +def VRSRA_N : SInst<"vrsra_n", "dddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| +def VQSHL_N : SInst<"vqshl_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; |
| +def VQSHLU_N : SInst<"vqshlu_n", "udi", "csilQcQsQiQl">; |
| +def VSHRN_N : IInst<"vshrn_n", "hki", "silUsUiUl">; |
| +def VQSHRUN_N : SInst<"vqshrun_n", "eki", "sil">; |
| +def VQRSHRUN_N : SInst<"vqrshrun_n", "eki", "sil">; |
| +def VQSHRN_N : SInst<"vqshrn_n", "hki", "silUsUiUl">; |
| +def VRSHRN_N : IInst<"vrshrn_n", "hki", "silUsUiUl">; |
| +def VQRSHRN_N : SInst<"vqrshrn_n", "hki", "silUsUiUl">; |
| +def VSHLL_N : SInst<"vshll_n", "wdi", "csiUcUsUi">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.13 Shifts with insert |
| -def VSRI_N : WInst<"vsri_n", "...I", |
| +def VSRI_N : WInst<"vsri_n", "dddi", |
| "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">; |
| -def VSLI_N : WInst<"vsli_n", "...I", |
| +def VSLI_N : WInst<"vsli_n", "dddi", |
| "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.14 Loads and stores of a single vector |
| -def VLD1 : WInst<"vld1", ".(c*!)", |
| +def VLD1 : WInst<"vld1", "dc", |
| "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; |
| -def VLD1_X2 : WInst<"vld1_x2", "2(c*!)", |
| +def VLD1_X2 : WInst<"vld1_x2", "2c", |
| "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; |
| -def VLD1_X3 : WInst<"vld1_x3", "3(c*!)", |
| +def VLD1_X3 : WInst<"vld1_x3", "3c", |
| "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; |
| -def VLD1_X4 : WInst<"vld1_x4", "4(c*!)", |
| +def VLD1_X4 : WInst<"vld1_x4", "4c", |
| "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; |
| -def VLD1_LANE : WInst<"vld1_lane", ".(c*!).I", |
| +def VLD1_LANE : WInst<"vld1_lane", "dcdi", |
| "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; |
| -def VLD1_DUP : WInst<"vld1_dup", ".(c*!)", |
| +def VLD1_DUP : WInst<"vld1_dup", "dc", |
| "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; |
| -def VST1 : WInst<"vst1", "v*(.!)", |
| +def VST1 : WInst<"vst1", "vpd", |
| "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; |
| -def VST1_X2 : WInst<"vst1_x2", "v*(2!)", |
| +def VST1_X2 : WInst<"vst1_x2", "vp2", |
| "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; |
| -def VST1_X3 : WInst<"vst1_x3", "v*(3!)", |
| +def VST1_X3 : WInst<"vst1_x3", "vp3", |
| "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; |
| -def VST1_X4 : WInst<"vst1_x4", "v*(4!)", |
| +def VST1_X4 : WInst<"vst1_x4", "vp4", |
| "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; |
| -def VST1_LANE : WInst<"vst1_lane", "v*(.!)I", |
| +def VST1_LANE : WInst<"vst1_lane", "vpdi", |
| "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; |
| let ArchGuard = "(__ARM_FP & 2)" in { |
| -def VLD1_F16 : WInst<"vld1", ".(c*!)", "hQh">; |
| -def VLD1_X2_F16 : WInst<"vld1_x2", "2(c*!)", "hQh">; |
| -def VLD1_X3_F16 : WInst<"vld1_x3", "3(c*!)", "hQh">; |
| -def VLD1_X4_F16 : WInst<"vld1_x4", "4(c*!)", "hQh">; |
| -def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh">; |
| -def VLD1_DUP_F16 : WInst<"vld1_dup", ".(c*!)", "hQh">; |
| -def VST1_F16 : WInst<"vst1", "v*(.!)", "hQh">; |
| -def VST1_X2_F16 : WInst<"vst1_x2", "v*(2!)", "hQh">; |
| -def VST1_X3_F16 : WInst<"vst1_x3", "v*(3!)", "hQh">; |
| -def VST1_X4_F16 : WInst<"vst1_x4", "v*(4!)", "hQh">; |
| -def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh">; |
| +def VLD1_F16 : WInst<"vld1", "dc", "hQh">; |
| +def VLD1_X2_F16 : WInst<"vld1_x2", "2c", "hQh">; |
| +def VLD1_X3_F16 : WInst<"vld1_x3", "3c", "hQh">; |
| +def VLD1_X4_F16 : WInst<"vld1_x4", "4c", "hQh">; |
| +def VLD1_LANE_F16 : WInst<"vld1_lane", "dcdi", "hQh">; |
| +def VLD1_DUP_F16 : WInst<"vld1_dup", "dc", "hQh">; |
| +def VST1_F16 : WInst<"vst1", "vpd", "hQh">; |
| +def VST1_X2_F16 : WInst<"vst1_x2", "vp2", "hQh">; |
| +def VST1_X3_F16 : WInst<"vst1_x3", "vp3", "hQh">; |
| +def VST1_X4_F16 : WInst<"vst1_x4", "vp4", "hQh">; |
| +def VST1_LANE_F16 : WInst<"vst1_lane", "vpdi", "hQh">; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.15 Loads and stores of an N-element structure |
| -def VLD2 : WInst<"vld2", "2(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; |
| -def VLD3 : WInst<"vld3", "3(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; |
| -def VLD4 : WInst<"vld4", "4(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; |
| -def VLD2_DUP : WInst<"vld2_dup", "2(c*!)", |
| +def VLD2 : WInst<"vld2", "2c", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; |
| +def VLD3 : WInst<"vld3", "3c", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; |
| +def VLD4 : WInst<"vld4", "4c", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; |
| +def VLD2_DUP : WInst<"vld2_dup", "2c", |
| "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">; |
| -def VLD3_DUP : WInst<"vld3_dup", "3(c*!)", |
| +def VLD3_DUP : WInst<"vld3_dup", "3c", |
| "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">; |
| -def VLD4_DUP : WInst<"vld4_dup", "4(c*!)", |
| +def VLD4_DUP : WInst<"vld4_dup", "4c", |
| "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">; |
| -def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; |
| -def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; |
| -def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; |
| -def VST2 : WInst<"vst2", "v*(2!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; |
| -def VST3 : WInst<"vst3", "v*(3!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; |
| -def VST4 : WInst<"vst4", "v*(4!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; |
| -def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; |
| -def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; |
| -def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; |
| +def VLD2_LANE : WInst<"vld2_lane", "2c2i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; |
| +def VLD3_LANE : WInst<"vld3_lane", "3c3i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; |
| +def VLD4_LANE : WInst<"vld4_lane", "4c4i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; |
| +def VST2 : WInst<"vst2", "vp2", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; |
| +def VST3 : WInst<"vst3", "vp3", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; |
| +def VST4 : WInst<"vst4", "vp4", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; |
| +def VST2_LANE : WInst<"vst2_lane", "vp2i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; |
| +def VST3_LANE : WInst<"vst3_lane", "vp3i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; |
| +def VST4_LANE : WInst<"vst4_lane", "vp4i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; |
| let ArchGuard = "(__ARM_FP & 2)" in { |
| -def VLD2_F16 : WInst<"vld2", "2(c*!)", "hQh">; |
| -def VLD3_F16 : WInst<"vld3", "3(c*!)", "hQh">; |
| -def VLD4_F16 : WInst<"vld4", "4(c*!)", "hQh">; |
| -def VLD2_DUP_F16 : WInst<"vld2_dup", "2(c*!)", "hQh">; |
| -def VLD3_DUP_F16 : WInst<"vld3_dup", "3(c*!)", "hQh">; |
| -def VLD4_DUP_F16 : WInst<"vld4_dup", "4(c*!)", "hQh">; |
| -def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh">; |
| -def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh">; |
| -def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh">; |
| -def VST2_F16 : WInst<"vst2", "v*(2!)", "hQh">; |
| -def VST3_F16 : WInst<"vst3", "v*(3!)", "hQh">; |
| -def VST4_F16 : WInst<"vst4", "v*(4!)", "hQh">; |
| -def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh">; |
| -def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh">; |
| -def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh">; |
| +def VLD2_F16 : WInst<"vld2", "2c", "hQh">; |
| +def VLD3_F16 : WInst<"vld3", "3c", "hQh">; |
| +def VLD4_F16 : WInst<"vld4", "4c", "hQh">; |
| +def VLD2_DUP_F16 : WInst<"vld2_dup", "2c", "hQh">; |
| +def VLD3_DUP_F16 : WInst<"vld3_dup", "3c", "hQh">; |
| +def VLD4_DUP_F16 : WInst<"vld4_dup", "4c", "hQh">; |
| +def VLD2_LANE_F16 : WInst<"vld2_lane", "2c2i", "hQh">; |
| +def VLD3_LANE_F16 : WInst<"vld3_lane", "3c3i", "hQh">; |
| +def VLD4_LANE_F16 : WInst<"vld4_lane", "4c4i", "hQh">; |
| +def VST2_F16 : WInst<"vst2", "vp2", "hQh">; |
| +def VST3_F16 : WInst<"vst3", "vp3", "hQh">; |
| +def VST4_F16 : WInst<"vst4", "vp4", "hQh">; |
| +def VST2_LANE_F16 : WInst<"vst2_lane", "vp2i", "hQh">; |
| +def VST3_LANE_F16 : WInst<"vst3_lane", "vp3i", "hQh">; |
| +def VST4_LANE_F16 : WInst<"vst4_lane", "vp4i", "hQh">; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.16 Extract lanes from a vector |
| let InstName = "vmov" in |
| -def VGET_LANE : IInst<"vget_lane", "1.I", |
| +def VGET_LANE : IInst<"vget_lane", "sdi", |
| "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.17 Set lanes within a vector |
| let InstName = "vmov" in |
| -def VSET_LANE : IInst<"vset_lane", ".1.I", |
| +def VSET_LANE : IInst<"vset_lane", "dsdi", |
| "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.18 Initialize a vector from bit pattern |
| -def VCREATE : NoTestOpInst<"vcreate", ".(IU>)", "csihfUcUsUiUlPcPsl", OP_CAST> { |
| +def VCREATE : NoTestOpInst<"vcreate", "dl", "csihfUcUsUiUlPcPsl", OP_CAST> { |
| let BigEndianSafe = 1; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.19 Set all lanes to same value |
| let InstName = "vmov" in { |
| -def VDUP_N : WOpInst<"vdup_n", ".1", |
| +def VDUP_N : WOpInst<"vdup_n", "ds", |
| "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl", |
| OP_DUP>; |
| -def VMOV_N : WOpInst<"vmov_n", ".1", |
| +def VMOV_N : WOpInst<"vmov_n", "ds", |
| "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl", |
| OP_DUP>; |
| } |
| let InstName = "" in |
| -def VDUP_LANE: WOpInst<"vdup_lane", ".qI", |
| +def VDUP_LANE: WOpInst<"vdup_lane", "dgi", |
| "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", |
| OP_DUP_LN>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.20 Combining vectors |
| -def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPs", OP_CONC>; |
| +def VCOMBINE : NoTestOpInst<"vcombine", "kdd", "csilhfUcUsUiUlPcPs", OP_CONC>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.21 Splitting vectors |
| @@ -468,127 +468,127 @@ def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPs", OP_CONC>; |
| // versions of these intrinsics in both AArch32 and AArch64 architectures. See |
| // D45668 for more details. |
| let InstName = "vmov" in { |
| -def VGET_HIGH : NoTestOpInst<"vget_high", ".Q", "csilhfUcUsUiUlPcPs", OP_HI>; |
| -def VGET_LOW : NoTestOpInst<"vget_low", ".Q", "csilhfUcUsUiUlPcPs", OP_LO>; |
| +def VGET_HIGH : NoTestOpInst<"vget_high", "dk", "csilhfUcUsUiUlPcPs", OP_HI>; |
| +def VGET_LOW : NoTestOpInst<"vget_low", "dk", "csilhfUcUsUiUlPcPs", OP_LO>; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.22 Converting vectors |
| |
| let ArchGuard = "(__ARM_FP & 2)" in { |
| - def VCVT_F16_F32 : SInst<"vcvt_f16_f32", "(<q)(.!)", "Hf">; |
| - def VCVT_F32_F16 : SInst<"vcvt_f32_f16", "(>Q)(.!)", "h">; |
| + def VCVT_F16_F32 : SInst<"vcvt_f16_f32", "md", "Hf">; |
| + def VCVT_F32_F16 : SInst<"vcvt_f32_f16", "wd", "h">; |
| } |
| |
| -def VCVT_S32 : SInst<"vcvt_s32", "S.", "fQf">; |
| -def VCVT_U32 : SInst<"vcvt_u32", "U.", "fQf">; |
| -def VCVT_F32 : SInst<"vcvt_f32", "F(.!)", "iUiQiQUi">; |
| +def VCVT_S32 : SInst<"vcvt_s32", "xd", "fQf">; |
| +def VCVT_U32 : SInst<"vcvt_u32", "ud", "fQf">; |
| +def VCVT_F32 : SInst<"vcvt_f32", "fd", "iUiQiQUi">; |
| let isVCVT_N = 1 in { |
| -def VCVT_N_S32 : SInst<"vcvt_n_s32", "S.I", "fQf">; |
| -def VCVT_N_U32 : SInst<"vcvt_n_u32", "U.I", "fQf">; |
| -def VCVT_N_F32 : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi">; |
| +def VCVT_N_S32 : SInst<"vcvt_n_s32", "xdi", "fQf">; |
| +def VCVT_N_U32 : SInst<"vcvt_n_u32", "udi", "fQf">; |
| +def VCVT_N_F32 : SInst<"vcvt_n_f32", "fdi", "iUiQiQUi">; |
| } |
| |
| -def VMOVN : IInst<"vmovn", "<Q", "silUsUiUl">; |
| -def VMOVL : SInst<"vmovl", "(>Q).", "csiUcUsUi">; |
| -def VQMOVN : SInst<"vqmovn", "<Q", "silUsUiUl">; |
| -def VQMOVUN : SInst<"vqmovun", "(<U)Q", "sil">; |
| +def VMOVN : IInst<"vmovn", "hk", "silUsUiUl">; |
| +def VMOVL : SInst<"vmovl", "wd", "csiUcUsUi">; |
| +def VQMOVN : SInst<"vqmovn", "hk", "silUsUiUl">; |
| +def VQMOVUN : SInst<"vqmovun", "ek", "sil">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.23-24 Table lookup, Extended table lookup |
| let InstName = "vtbl" in { |
| -def VTBL1 : WInst<"vtbl1", "..p", "UccPc">; |
| -def VTBL2 : WInst<"vtbl2", ".2p", "UccPc">; |
| -def VTBL3 : WInst<"vtbl3", ".3p", "UccPc">; |
| -def VTBL4 : WInst<"vtbl4", ".4p", "UccPc">; |
| +def VTBL1 : WInst<"vtbl1", "ddt", "UccPc">; |
| +def VTBL2 : WInst<"vtbl2", "d2t", "UccPc">; |
| +def VTBL3 : WInst<"vtbl3", "d3t", "UccPc">; |
| +def VTBL4 : WInst<"vtbl4", "d4t", "UccPc">; |
| } |
| let InstName = "vtbx" in { |
| -def VTBX1 : WInst<"vtbx1", "...p", "UccPc">; |
| -def VTBX2 : WInst<"vtbx2", "..2p", "UccPc">; |
| -def VTBX3 : WInst<"vtbx3", "..3p", "UccPc">; |
| -def VTBX4 : WInst<"vtbx4", "..4p", "UccPc">; |
| +def VTBX1 : WInst<"vtbx1", "dddt", "UccPc">; |
| +def VTBX2 : WInst<"vtbx2", "dd2t", "UccPc">; |
| +def VTBX3 : WInst<"vtbx3", "dd3t", "UccPc">; |
| +def VTBX4 : WInst<"vtbx4", "dd4t", "UccPc">; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.25 Operations with a scalar value |
| -def VMLA_LANE : IOpInst<"vmla_lane", "...qI", |
| +def VMLA_LANE : IOpInst<"vmla_lane", "dddgi", |
| "siUsUifQsQiQUsQUiQf", OP_MLA_LN>; |
| -def VMLAL_LANE : SOpInst<"vmlal_lane", "(>Q)(>Q)..I", "siUsUi", OP_MLAL_LN>; |
| -def VQDMLAL_LANE : SOpInst<"vqdmlal_lane", "(>Q)(>Q)..I", "si", OP_QDMLAL_LN>; |
| -def VMLS_LANE : IOpInst<"vmls_lane", "...qI", |
| +def VMLAL_LANE : SOpInst<"vmlal_lane", "wwddi", "siUsUi", OP_MLAL_LN>; |
| +def VQDMLAL_LANE : SOpInst<"vqdmlal_lane", "wwddi", "si", OP_QDMLAL_LN>; |
| +def VMLS_LANE : IOpInst<"vmls_lane", "dddgi", |
| "siUsUifQsQiQUsQUiQf", OP_MLS_LN>; |
| -def VMLSL_LANE : SOpInst<"vmlsl_lane", "(>Q)(>Q)..I", "siUsUi", OP_MLSL_LN>; |
| -def VQDMLSL_LANE : SOpInst<"vqdmlsl_lane", "(>Q)(>Q)..I", "si", OP_QDMLSL_LN>; |
| -def VMUL_N : IOpInst<"vmul_n", "..1", "sifUsUiQsQiQfQUsQUi", OP_MUL_N>; |
| -def VMUL_LANE : IOpInst<"vmul_lane", "..qI", |
| +def VMLSL_LANE : SOpInst<"vmlsl_lane", "wwddi", "siUsUi", OP_MLSL_LN>; |
| +def VQDMLSL_LANE : SOpInst<"vqdmlsl_lane", "wwddi", "si", OP_QDMLSL_LN>; |
| +def VMUL_N : IOpInst<"vmul_n", "dds", "sifUsUiQsQiQfQUsQUi", OP_MUL_N>; |
| +def VMUL_LANE : IOpInst<"vmul_lane", "ddgi", |
| "sifUsUiQsQiQfQUsQUi", OP_MUL_LN>; |
| -def VMULL_N : SOpInst<"vmull_n", "(>Q).1", "siUsUi", OP_MULL_N>; |
| -def VMULL_LANE : SOpInst<"vmull_lane", "(>Q)..I", "siUsUi", OP_MULL_LN>; |
| -def VQDMULL_N : SOpInst<"vqdmull_n", "(>Q).1", "si", OP_QDMULL_N>; |
| -def VQDMULL_LANE : SOpInst<"vqdmull_lane", "(>Q)..I", "si", OP_QDMULL_LN>; |
| -def VQDMULH_N : SOpInst<"vqdmulh_n", "..1", "siQsQi", OP_QDMULH_N>; |
| -def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "..qI", "siQsQi", OP_QDMULH_LN>; |
| -def VQRDMULH_N : SOpInst<"vqrdmulh_n", "..1", "siQsQi", OP_QRDMULH_N>; |
| -def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>; |
| +def VMULL_N : SOpInst<"vmull_n", "wds", "siUsUi", OP_MULL_N>; |
| +def VMULL_LANE : SOpInst<"vmull_lane", "wddi", "siUsUi", OP_MULL_LN>; |
| +def VQDMULL_N : SOpInst<"vqdmull_n", "wds", "si", OP_QDMULL_N>; |
| +def VQDMULL_LANE : SOpInst<"vqdmull_lane", "wddi", "si", OP_QDMULL_LN>; |
| +def VQDMULH_N : SOpInst<"vqdmulh_n", "dds", "siQsQi", OP_QDMULH_N>; |
| +def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "ddgi", "siQsQi", OP_QDMULH_LN>; |
| +def VQRDMULH_N : SOpInst<"vqrdmulh_n", "dds", "siQsQi", OP_QRDMULH_N>; |
| +def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ddgi", "siQsQi", OP_QRDMULH_LN>; |
| |
| let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in { |
| -def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "...qI", "siQsQi", OP_QRDMLAH_LN>; |
| -def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "...qI", "siQsQi", OP_QRDMLSH_LN>; |
| +def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "dddgi", "siQsQi", OP_QRDMLAH_LN>; |
| +def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "dddgi", "siQsQi", OP_QRDMLSH_LN>; |
| } |
| |
| -def VMLA_N : IOpInst<"vmla_n", "...1", "siUsUifQsQiQUsQUiQf", OP_MLA_N>; |
| -def VMLAL_N : SOpInst<"vmlal_n", "(>Q)(>Q).1", "siUsUi", OP_MLAL_N>; |
| -def VQDMLAL_N : SOpInst<"vqdmlal_n", "(>Q)(>Q).1", "si", OP_QDMLAL_N>; |
| -def VMLS_N : IOpInst<"vmls_n", "...1", "siUsUifQsQiQUsQUiQf", OP_MLS_N>; |
| -def VMLSL_N : SOpInst<"vmlsl_n", "(>Q)(>Q).1", "siUsUi", OP_MLSL_N>; |
| -def VQDMLSL_N : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>; |
| +def VMLA_N : IOpInst<"vmla_n", "ddds", "siUsUifQsQiQUsQUiQf", OP_MLA_N>; |
| +def VMLAL_N : SOpInst<"vmlal_n", "wwds", "siUsUi", OP_MLAL_N>; |
| +def VQDMLAL_N : SOpInst<"vqdmlal_n", "wwds", "si", OP_QDMLAL_N>; |
| +def VMLS_N : IOpInst<"vmls_n", "ddds", "siUsUifQsQiQUsQUiQf", OP_MLS_N>; |
| +def VMLSL_N : SOpInst<"vmlsl_n", "wwds", "siUsUi", OP_MLSL_N>; |
| +def VQDMLSL_N : SOpInst<"vqdmlsl_n", "wwds", "si", OP_QDMLSL_N>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.26 Vector Extract |
| -def VEXT : WInst<"vext", "...I", |
| +def VEXT : WInst<"vext", "dddi", |
| "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.27 Reverse vector elements |
| -def VREV64 : WOpInst<"vrev64", "..", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQf", |
| +def VREV64 : WOpInst<"vrev64", "dd", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQf", |
| OP_REV64>; |
| -def VREV32 : WOpInst<"vrev32", "..", "csUcUsPcPsQcQsQUcQUsQPcQPs", OP_REV32>; |
| -def VREV16 : WOpInst<"vrev16", "..", "cUcPcQcQUcQPc", OP_REV16>; |
| +def VREV32 : WOpInst<"vrev32", "dd", "csUcUsPcPsQcQsQUcQUsQPcQPs", OP_REV32>; |
| +def VREV16 : WOpInst<"vrev16", "dd", "cUcPcQcQUcQPc", OP_REV16>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.28 Other single operand arithmetic |
| -def VABS : SInst<"vabs", "..", "csifQcQsQiQf">; |
| -def VQABS : SInst<"vqabs", "..", "csiQcQsQi">; |
| -def VNEG : SOpInst<"vneg", "..", "csifQcQsQiQf", OP_NEG>; |
| -def VQNEG : SInst<"vqneg", "..", "csiQcQsQi">; |
| -def VCLS : SInst<"vcls", "..", "csiQcQsQi">; |
| -def VCLZ : IInst<"vclz", "..", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| -def VCNT : WInst<"vcnt", "..", "UccPcQUcQcQPc">; |
| -def VRECPE : SInst<"vrecpe", "..", "fUiQfQUi">; |
| -def VRSQRTE : SInst<"vrsqrte", "..", "fUiQfQUi">; |
| +def VABS : SInst<"vabs", "dd", "csifQcQsQiQf">; |
| +def VQABS : SInst<"vqabs", "dd", "csiQcQsQi">; |
| +def VNEG : SOpInst<"vneg", "dd", "csifQcQsQiQf", OP_NEG>; |
| +def VQNEG : SInst<"vqneg", "dd", "csiQcQsQi">; |
| +def VCLS : SInst<"vcls", "dd", "csiQcQsQi">; |
| +def VCLZ : IInst<"vclz", "dd", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| +def VCNT : WInst<"vcnt", "dd", "UccPcQUcQcQPc">; |
| +def VRECPE : SInst<"vrecpe", "dd", "fUiQfQUi">; |
| +def VRSQRTE : SInst<"vrsqrte", "dd", "fUiQfQUi">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.29 Logical operations |
| -def VMVN : LOpInst<"vmvn", "..", "csiUcUsUiPcQcQsQiQUcQUsQUiQPc", OP_NOT>; |
| -def VAND : LOpInst<"vand", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_AND>; |
| -def VORR : LOpInst<"vorr", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>; |
| -def VEOR : LOpInst<"veor", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>; |
| -def VBIC : LOpInst<"vbic", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>; |
| -def VORN : LOpInst<"vorn", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>; |
| +def VMVN : LOpInst<"vmvn", "dd", "csiUcUsUiPcQcQsQiQUcQUsQUiQPc", OP_NOT>; |
| +def VAND : LOpInst<"vand", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_AND>; |
| +def VORR : LOpInst<"vorr", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>; |
| +def VEOR : LOpInst<"veor", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>; |
| +def VBIC : LOpInst<"vbic", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>; |
| +def VORN : LOpInst<"vorn", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>; |
| let isHiddenLInst = 1 in |
| -def VBSL : SInst<"vbsl", ".U..", |
| +def VBSL : SInst<"vbsl", "dudd", |
| "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.30 Transposition operations |
| -def VTRN : WInst<"vtrn", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; |
| -def VZIP : WInst<"vzip", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; |
| -def VUZP : WInst<"vuzp", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; |
| +def VTRN : WInst<"vtrn", "2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; |
| +def VZIP : WInst<"vzip", "2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; |
| +def VUZP : WInst<"vuzp", "2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // E.3.31 Vector reinterpret cast operations |
| def VREINTERPRET |
| - : NoTestOpInst<"vreinterpret", "..", |
| + : NoTestOpInst<"vreinterpret", "dd", |
| "csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs", OP_REINT> { |
| let CartesianProductOfTypes = 1; |
| let ArchGuard = "!defined(__aarch64__)"; |
| @@ -599,17 +599,17 @@ def VREINTERPRET |
| // Vector fused multiply-add operations |
| |
| let ArchGuard = "defined(__ARM_FEATURE_FMA)" in { |
| - def VFMA : SInst<"vfma", "....", "fQf">; |
| - def VFMS : SOpInst<"vfms", "....", "fQf", OP_FMLS>; |
| - def FMLA_N_F32 : SOpInst<"vfma_n", "...1", "fQf", OP_FMLA_N>; |
| + def VFMA : SInst<"vfma", "dddd", "fQf">; |
| + def VFMS : SOpInst<"vfms", "dddd", "fQf", OP_FMLS>; |
| + def FMLA_N_F32 : SOpInst<"vfma_n", "ddds", "fQf", OP_FMLA_N>; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // fp16 vector operations |
| -def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "1.I", "h", OP_SCALAR_HALF_GET_LN>; |
| -def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", ".1.I", "h", OP_SCALAR_HALF_SET_LN>; |
| -def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "1.I", "Qh", OP_SCALAR_HALF_GET_LNQ>; |
| -def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", ".1.I", "Qh", OP_SCALAR_HALF_SET_LNQ>; |
| +def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "sdi", "h", OP_SCALAR_HALF_GET_LN>; |
| +def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", "dsdi", "h", OP_SCALAR_HALF_SET_LN>; |
| +def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "sdi", "Qh", OP_SCALAR_HALF_GET_LNQ>; |
| +def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", "dsdi", "Qh", OP_SCALAR_HALF_SET_LNQ>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // AArch64 Intrinsics |
| @@ -618,474 +618,474 @@ let ArchGuard = "defined(__aarch64__)" in { |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Load/Store |
| -def LD1 : WInst<"vld1", ".(c*!)", "dQdPlQPl">; |
| -def LD2 : WInst<"vld2", "2(c*!)", "QUlQldQdPlQPl">; |
| -def LD3 : WInst<"vld3", "3(c*!)", "QUlQldQdPlQPl">; |
| -def LD4 : WInst<"vld4", "4(c*!)", "QUlQldQdPlQPl">; |
| -def ST1 : WInst<"vst1", "v*(.!)", "dQdPlQPl">; |
| -def ST2 : WInst<"vst2", "v*(2!)", "QUlQldQdPlQPl">; |
| -def ST3 : WInst<"vst3", "v*(3!)", "QUlQldQdPlQPl">; |
| -def ST4 : WInst<"vst4", "v*(4!)", "QUlQldQdPlQPl">; |
| - |
| -def LD1_X2 : WInst<"vld1_x2", "2(c*!)", |
| +def LD1 : WInst<"vld1", "dc", "dQdPlQPl">; |
| +def LD2 : WInst<"vld2", "2c", "QUlQldQdPlQPl">; |
| +def LD3 : WInst<"vld3", "3c", "QUlQldQdPlQPl">; |
| +def LD4 : WInst<"vld4", "4c", "QUlQldQdPlQPl">; |
| +def ST1 : WInst<"vst1", "vpd", "dQdPlQPl">; |
| +def ST2 : WInst<"vst2", "vp2", "QUlQldQdPlQPl">; |
| +def ST3 : WInst<"vst3", "vp3", "QUlQldQdPlQPl">; |
| +def ST4 : WInst<"vst4", "vp4", "QUlQldQdPlQPl">; |
| + |
| +def LD1_X2 : WInst<"vld1_x2", "2c", |
| "dQdPlQPl">; |
| -def LD1_X3 : WInst<"vld1_x3", "3(c*!)", |
| +def LD1_X3 : WInst<"vld1_x3", "3c", |
| "dQdPlQPl">; |
| -def LD1_X4 : WInst<"vld1_x4", "4(c*!)", |
| +def LD1_X4 : WInst<"vld1_x4", "4c", |
| "dQdPlQPl">; |
| |
| -def ST1_X2 : WInst<"vst1_x2", "v*(2!)", "dQdPlQPl">; |
| -def ST1_X3 : WInst<"vst1_x3", "v*(3!)", "dQdPlQPl">; |
| -def ST1_X4 : WInst<"vst1_x4", "v*(4!)", "dQdPlQPl">; |
| +def ST1_X2 : WInst<"vst1_x2", "vp2", "dQdPlQPl">; |
| +def ST1_X3 : WInst<"vst1_x3", "vp3", "dQdPlQPl">; |
| +def ST1_X4 : WInst<"vst1_x4", "vp4", "dQdPlQPl">; |
| |
| -def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl">; |
| -def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl">; |
| -def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl">; |
| -def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl">; |
| -def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl">; |
| -def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl">; |
| -def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl">; |
| -def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl">; |
| +def LD1_LANE : WInst<"vld1_lane", "dcdi", "dQdPlQPl">; |
| +def LD2_LANE : WInst<"vld2_lane", "2c2i", "lUlQcQUcQPcQlQUldQdPlQPl">; |
| +def LD3_LANE : WInst<"vld3_lane", "3c3i", "lUlQcQUcQPcQlQUldQdPlQPl">; |
| +def LD4_LANE : WInst<"vld4_lane", "4c4i", "lUlQcQUcQPcQlQUldQdPlQPl">; |
| +def ST1_LANE : WInst<"vst1_lane", "vpdi", "dQdPlQPl">; |
| +def ST2_LANE : WInst<"vst2_lane", "vp2i", "lUlQcQUcQPcQlQUldQdPlQPl">; |
| +def ST3_LANE : WInst<"vst3_lane", "vp3i", "lUlQcQUcQPcQlQUldQdPlQPl">; |
| +def ST4_LANE : WInst<"vst4_lane", "vp4i", "lUlQcQUcQPcQlQUldQdPlQPl">; |
| |
| -def LD1_DUP : WInst<"vld1_dup", ".(c*!)", "dQdPlQPl">; |
| -def LD2_DUP : WInst<"vld2_dup", "2(c*!)", "dQdPlQPl">; |
| -def LD3_DUP : WInst<"vld3_dup", "3(c*!)", "dQdPlQPl">; |
| -def LD4_DUP : WInst<"vld4_dup", "4(c*!)", "dQdPlQPl">; |
| +def LD1_DUP : WInst<"vld1_dup", "dc", "dQdPlQPl">; |
| +def LD2_DUP : WInst<"vld2_dup", "2c", "dQdPlQPl">; |
| +def LD3_DUP : WInst<"vld3_dup", "3c", "dQdPlQPl">; |
| +def LD4_DUP : WInst<"vld4_dup", "4c", "dQdPlQPl">; |
| |
| -def VLDRQ : WInst<"vldrq", "1(c*!)", "Pk">; |
| -def VSTRQ : WInst<"vstrq", "v*(1!)", "Pk">; |
| +def VLDRQ : WInst<"vldrq", "sc", "Pk">; |
| +def VSTRQ : WInst<"vstrq", "vps", "Pk">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Addition |
| -def ADD : IOpInst<"vadd", "...", "dQd", OP_ADD>; |
| +def ADD : IOpInst<"vadd", "ddd", "dQd", OP_ADD>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Subtraction |
| -def SUB : IOpInst<"vsub", "...", "dQd", OP_SUB>; |
| +def SUB : IOpInst<"vsub", "ddd", "dQd", OP_SUB>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Multiplication |
| -def MUL : IOpInst<"vmul", "...", "dQd", OP_MUL>; |
| -def MLA : IOpInst<"vmla", "....", "dQd", OP_MLA>; |
| -def MLS : IOpInst<"vmls", "....", "dQd", OP_MLS>; |
| +def MUL : IOpInst<"vmul", "ddd", "dQd", OP_MUL>; |
| +def MLA : IOpInst<"vmla", "dddd", "dQd", OP_MLA>; |
| +def MLS : IOpInst<"vmls", "dddd", "dQd", OP_MLS>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Multiplication Extended |
| -def MULX : SInst<"vmulx", "...", "fdQfQd">; |
| +def MULX : SInst<"vmulx", "ddd", "fdQfQd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Division |
| -def FDIV : IOpInst<"vdiv", "...", "fdQfQd", OP_DIV>; |
| +def FDIV : IOpInst<"vdiv", "ddd", "fdQfQd", OP_DIV>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Vector fused multiply-add operations |
| -def FMLA : SInst<"vfma", "....", "dQd">; |
| -def FMLS : SOpInst<"vfms", "....", "dQd", OP_FMLS>; |
| +def FMLA : SInst<"vfma", "dddd", "dQd">; |
| +def FMLS : SOpInst<"vfms", "dddd", "dQd", OP_FMLS>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // MUL, MLA, MLS, FMA, FMS definitions with scalar argument |
| -def VMUL_N_A64 : IOpInst<"vmul_n", "..1", "Qd", OP_MUL_N>; |
| +def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>; |
| |
| -def FMLA_N : SOpInst<"vfma_n", "...1", "dQd", OP_FMLA_N>; |
| -def FMLS_N : SOpInst<"vfms_n", "...1", "fdQfQd", OP_FMLS_N>; |
| +def FMLA_N : SOpInst<"vfma_n", "ddds", "dQd", OP_FMLA_N>; |
| +def FMLS_N : SOpInst<"vfms_n", "ddds", "fdQfQd", OP_FMLS_N>; |
| |
| -def MLA_N : SOpInst<"vmla_n", "...1", "Qd", OP_MLA_N>; |
| -def MLS_N : SOpInst<"vmls_n", "...1", "Qd", OP_MLS_N>; |
| +def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>; |
| +def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Logical operations |
| -def BSL : SInst<"vbsl", ".U..", "dPlQdQPl">; |
| +def BSL : SInst<"vbsl", "dudd", "dPlQdQPl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Absolute Difference |
| -def ABD : SInst<"vabd", "...", "dQd">; |
| +def ABD : SInst<"vabd", "ddd", "dQd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // saturating absolute/negate |
| -def ABS : SInst<"vabs", "..", "dQdlQl">; |
| -def QABS : SInst<"vqabs", "..", "lQl">; |
| -def NEG : SOpInst<"vneg", "..", "dlQdQl", OP_NEG>; |
| -def QNEG : SInst<"vqneg", "..", "lQl">; |
| +def ABS : SInst<"vabs", "dd", "dQdlQl">; |
| +def QABS : SInst<"vqabs", "dd", "lQl">; |
| +def NEG : SOpInst<"vneg", "dd", "dlQdQl", OP_NEG>; |
| +def QNEG : SInst<"vqneg", "dd", "lQl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Signed Saturating Accumulated of Unsigned Value |
| -def SUQADD : SInst<"vuqadd", "..U", "csilQcQsQiQl">; |
| +def SUQADD : SInst<"vuqadd", "ddu", "csilQcQsQiQl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Unsigned Saturating Accumulated of Signed Value |
| -def USQADD : SInst<"vsqadd", "..S", "UcUsUiUlQUcQUsQUiQUl">; |
| +def USQADD : SInst<"vsqadd", "ddx", "UcUsUiUlQUcQUsQUiQUl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Reciprocal/Sqrt |
| -def FRECPS : IInst<"vrecps", "...", "dQd">; |
| -def FRSQRTS : IInst<"vrsqrts", "...", "dQd">; |
| -def FRECPE : SInst<"vrecpe", "..", "dQd">; |
| -def FRSQRTE : SInst<"vrsqrte", "..", "dQd">; |
| -def FSQRT : SInst<"vsqrt", "..", "fdQfQd">; |
| +def FRECPS : IInst<"vrecps", "ddd", "dQd">; |
| +def FRSQRTS : IInst<"vrsqrts", "ddd", "dQd">; |
| +def FRECPE : SInst<"vrecpe", "dd", "dQd">; |
| +def FRSQRTE : SInst<"vrsqrte", "dd", "dQd">; |
| +def FSQRT : SInst<"vsqrt", "dd", "fdQfQd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // bitwise reverse |
| -def RBIT : IInst<"vrbit", "..", "cUcPcQcQUcQPc">; |
| +def RBIT : IInst<"vrbit", "dd", "cUcPcQcQUcQPc">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Integer extract and narrow to high |
| -def XTN2 : SOpInst<"vmovn_high", "(<Q)<Q", "silUsUiUl", OP_XTN>; |
| +def XTN2 : SOpInst<"vmovn_high", "qhk", "silUsUiUl", OP_XTN>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Signed integer saturating extract and unsigned narrow to high |
| -def SQXTUN2 : SOpInst<"vqmovun_high", "(<U)(<q).", "HsHiHl", OP_SQXTUN>; |
| +def SQXTUN2 : SOpInst<"vqmovun_high", "emd", "HsHiHl", OP_SQXTUN>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Integer saturating extract and narrow to high |
| -def QXTN2 : SOpInst<"vqmovn_high", "(<Q)<Q", "silUsUiUl", OP_QXTN>; |
| +def QXTN2 : SOpInst<"vqmovn_high", "qhk", "silUsUiUl", OP_QXTN>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Converting vectors |
| |
| -def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "(<q).", "Qd">; |
| -def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "(>Q).", "f">; |
| +def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "md", "Qd">; |
| +def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "wd", "f">; |
| |
| -def VCVT_S64 : SInst<"vcvt_s64", "S.", "dQd">; |
| -def VCVT_U64 : SInst<"vcvt_u64", "U.", "dQd">; |
| -def VCVT_F64 : SInst<"vcvt_f64", "F(.!)", "lUlQlQUl">; |
| +def VCVT_S64 : SInst<"vcvt_s64", "xd", "dQd">; |
| +def VCVT_U64 : SInst<"vcvt_u64", "ud", "dQd">; |
| +def VCVT_F64 : SInst<"vcvt_f64", "Fd", "lUlQlQUl">; |
| |
| -def VCVT_HIGH_F16_F32 : SOpInst<"vcvt_high_f16", "<(<q!)Q", "Hf", OP_VCVT_NA_HI_F16>; |
| -def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "(>Q)(Q!)", "h", OP_VCVT_EX_HI_F32>; |
| -def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "(<Q)(F<!)Q", "d", OP_VCVT_NA_HI_F32>; |
| -def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "(>Q)(Q!)", "f", OP_VCVT_EX_HI_F64>; |
| +def VCVT_HIGH_F16_F32 : SOpInst<"vcvt_high_f16", "hmj", "Hf", OP_VCVT_NA_HI_F16>; |
| +def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "wk", "h", OP_VCVT_EX_HI_F32>; |
| +def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "qfj", "d", OP_VCVT_NA_HI_F32>; |
| +def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI_F64>; |
| |
| -def VCVTX_F32_F64 : SInst<"vcvtx_f32", "(F<)(Q!)", "d">; |
| -def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "(<Q)(F<!)Q", "d", OP_VCVTX_HI>; |
| +def VCVTX_F32_F64 : SInst<"vcvtx_f32", "fj", "d">; |
| +def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "qfj", "d", OP_VCVTX_HI>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Comparison |
| -def FCAGE : IInst<"vcage", "U..", "dQd">; |
| -def FCAGT : IInst<"vcagt", "U..", "dQd">; |
| -def FCALE : IInst<"vcale", "U..", "dQd">; |
| -def FCALT : IInst<"vcalt", "U..", "dQd">; |
| -def CMTST : WInst<"vtst", "U..", "lUlPlQlQUlQPl">; |
| -def CFMEQ : SOpInst<"vceq", "U..", "lUldQdQlQUlPlQPl", OP_EQ>; |
| -def CFMGE : SOpInst<"vcge", "U..", "lUldQdQlQUl", OP_GE>; |
| -def CFMLE : SOpInst<"vcle", "U..", "lUldQdQlQUl", OP_LE>; |
| -def CFMGT : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>; |
| -def CFMLT : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>; |
| - |
| -def CMEQ : SInst<"vceqz", "U.", |
| +def FCAGE : IInst<"vcage", "udd", "dQd">; |
| +def FCAGT : IInst<"vcagt", "udd", "dQd">; |
| +def FCALE : IInst<"vcale", "udd", "dQd">; |
| +def FCALT : IInst<"vcalt", "udd", "dQd">; |
| +def CMTST : WInst<"vtst", "udd", "lUlPlQlQUlQPl">; |
| +def CFMEQ : SOpInst<"vceq", "udd", "lUldQdQlQUlPlQPl", OP_EQ>; |
| +def CFMGE : SOpInst<"vcge", "udd", "lUldQdQlQUl", OP_GE>; |
| +def CFMLE : SOpInst<"vcle", "udd", "lUldQdQlQUl", OP_LE>; |
| +def CFMGT : SOpInst<"vcgt", "udd", "lUldQdQlQUl", OP_GT>; |
| +def CFMLT : SOpInst<"vclt", "udd", "lUldQdQlQUl", OP_LT>; |
| + |
| +def CMEQ : SInst<"vceqz", "ud", |
| "csilfUcUsUiUlPcPsPlQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQdQPl">; |
| -def CMGE : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">; |
| -def CMLE : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">; |
| -def CMGT : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">; |
| -def CMLT : SInst<"vcltz", "U.", "csilfdQcQsQiQlQfQd">; |
| +def CMGE : SInst<"vcgez", "ud", "csilfdQcQsQiQlQfQd">; |
| +def CMLE : SInst<"vclez", "ud", "csilfdQcQsQiQlQfQd">; |
| +def CMGT : SInst<"vcgtz", "ud", "csilfdQcQsQiQlQfQd">; |
| +def CMLT : SInst<"vcltz", "ud", "csilfdQcQsQiQlQfQd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Max/Min Integer |
| -def MAX : SInst<"vmax", "...", "dQd">; |
| -def MIN : SInst<"vmin", "...", "dQd">; |
| +def MAX : SInst<"vmax", "ddd", "dQd">; |
| +def MIN : SInst<"vmin", "ddd", "dQd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Pairwise Max/Min |
| -def MAXP : SInst<"vpmax", "...", "QcQsQiQUcQUsQUiQfQd">; |
| -def MINP : SInst<"vpmin", "...", "QcQsQiQUcQUsQUiQfQd">; |
| +def MAXP : SInst<"vpmax", "ddd", "QcQsQiQUcQUsQUiQfQd">; |
| +def MINP : SInst<"vpmin", "ddd", "QcQsQiQUcQUsQUiQfQd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Pairwise MaxNum/MinNum Floating Point |
| -def FMAXNMP : SInst<"vpmaxnm", "...", "fQfQd">; |
| -def FMINNMP : SInst<"vpminnm", "...", "fQfQd">; |
| +def FMAXNMP : SInst<"vpmaxnm", "ddd", "fQfQd">; |
| +def FMINNMP : SInst<"vpminnm", "ddd", "fQfQd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Pairwise Addition |
| -def ADDP : IInst<"vpadd", "...", "QcQsQiQlQUcQUsQUiQUlQfQd">; |
| +def ADDP : IInst<"vpadd", "ddd", "QcQsQiQlQUcQUsQUiQUlQfQd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Shifts by constant |
| let isShift = 1 in { |
| // Left shift long high |
| -def SHLL_HIGH_N : SOpInst<"vshll_high_n", ">.I", "HcHsHiHUcHUsHUi", |
| +def SHLL_HIGH_N : SOpInst<"vshll_high_n", "ndi", "HcHsHiHUcHUsHUi", |
| OP_LONG_HI>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| -def SRI_N : WInst<"vsri_n", "...I", "PlQPl">; |
| -def SLI_N : WInst<"vsli_n", "...I", "PlQPl">; |
| +def SRI_N : WInst<"vsri_n", "dddi", "PlQPl">; |
| +def SLI_N : WInst<"vsli_n", "dddi", "PlQPl">; |
| |
| // Right shift narrow high |
| -def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "<(<q).I", |
| +def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "hmdi", |
| "HsHiHlHUsHUiHUl", OP_NARROW_HI>; |
| -def QSHRUN_HIGH_N : SOpInst<"vqshrun_high_n", "<(<q).I", |
| +def QSHRUN_HIGH_N : SOpInst<"vqshrun_high_n", "hmdi", |
| "HsHiHl", OP_NARROW_HI>; |
| -def RSHRN_HIGH_N : IOpInst<"vrshrn_high_n", "<(<q).I", |
| +def RSHRN_HIGH_N : IOpInst<"vrshrn_high_n", "hmdi", |
| "HsHiHlHUsHUiHUl", OP_NARROW_HI>; |
| -def QRSHRUN_HIGH_N : SOpInst<"vqrshrun_high_n", "<(<q).I", |
| +def QRSHRUN_HIGH_N : SOpInst<"vqrshrun_high_n", "hmdi", |
| "HsHiHl", OP_NARROW_HI>; |
| -def QSHRN_HIGH_N : SOpInst<"vqshrn_high_n", "<(<q).I", |
| +def QSHRN_HIGH_N : SOpInst<"vqshrn_high_n", "hmdi", |
| "HsHiHlHUsHUiHUl", OP_NARROW_HI>; |
| -def QRSHRN_HIGH_N : SOpInst<"vqrshrn_high_n", "<(<q).I", |
| +def QRSHRN_HIGH_N : SOpInst<"vqrshrn_high_n", "hmdi", |
| "HsHiHlHUsHUiHUl", OP_NARROW_HI>; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Converting vectors |
| -def VMOVL_HIGH : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>; |
| +def VMOVL_HIGH : SOpInst<"vmovl_high", "nd", "HcHsHiHUcHUsHUi", OP_MOVL_HI>; |
| |
| let isVCVT_N = 1 in { |
| -def CVTF_N_F64 : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl">; |
| -def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd">; |
| -def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd">; |
| +def CVTF_N_F64 : SInst<"vcvt_n_f64", "Fdi", "lUlQlQUl">; |
| +def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "xdi", "dQd">; |
| +def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "udi", "dQd">; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // 3VDiff class using high 64-bit in operands |
| -def VADDL_HIGH : SOpInst<"vaddl_high", "(>Q)QQ", "csiUcUsUi", OP_ADDLHi>; |
| -def VADDW_HIGH : SOpInst<"vaddw_high", "(>Q)(>Q)Q", "csiUcUsUi", OP_ADDWHi>; |
| -def VSUBL_HIGH : SOpInst<"vsubl_high", "(>Q)QQ", "csiUcUsUi", OP_SUBLHi>; |
| -def VSUBW_HIGH : SOpInst<"vsubw_high", "(>Q)(>Q)Q", "csiUcUsUi", OP_SUBWHi>; |
| +def VADDL_HIGH : SOpInst<"vaddl_high", "wkk", "csiUcUsUi", OP_ADDLHi>; |
| +def VADDW_HIGH : SOpInst<"vaddw_high", "wwk", "csiUcUsUi", OP_ADDWHi>; |
| +def VSUBL_HIGH : SOpInst<"vsubl_high", "wkk", "csiUcUsUi", OP_SUBLHi>; |
| +def VSUBW_HIGH : SOpInst<"vsubw_high", "wwk", "csiUcUsUi", OP_SUBWHi>; |
| |
| -def VABDL_HIGH : SOpInst<"vabdl_high", "(>Q)QQ", "csiUcUsUi", OP_ABDLHi>; |
| -def VABAL_HIGH : SOpInst<"vabal_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_ABALHi>; |
| +def VABDL_HIGH : SOpInst<"vabdl_high", "wkk", "csiUcUsUi", OP_ABDLHi>; |
| +def VABAL_HIGH : SOpInst<"vabal_high", "wwkk", "csiUcUsUi", OP_ABALHi>; |
| |
| -def VMULL_HIGH : SOpInst<"vmull_high", "(>Q)QQ", "csiUcUsUiPc", OP_MULLHi>; |
| -def VMULL_HIGH_N : SOpInst<"vmull_high_n", "(>Q)Q1", "siUsUi", OP_MULLHi_N>; |
| -def VMLAL_HIGH : SOpInst<"vmlal_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_MLALHi>; |
| -def VMLAL_HIGH_N : SOpInst<"vmlal_high_n", "(>Q)(>Q)Q1", "siUsUi", OP_MLALHi_N>; |
| -def VMLSL_HIGH : SOpInst<"vmlsl_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_MLSLHi>; |
| -def VMLSL_HIGH_N : SOpInst<"vmlsl_high_n", "(>Q)(>Q)Q1", "siUsUi", OP_MLSLHi_N>; |
| +def VMULL_HIGH : SOpInst<"vmull_high", "wkk", "csiUcUsUiPc", OP_MULLHi>; |
| +def VMULL_HIGH_N : SOpInst<"vmull_high_n", "wks", "siUsUi", OP_MULLHi_N>; |
| +def VMLAL_HIGH : SOpInst<"vmlal_high", "wwkk", "csiUcUsUi", OP_MLALHi>; |
| +def VMLAL_HIGH_N : SOpInst<"vmlal_high_n", "wwks", "siUsUi", OP_MLALHi_N>; |
| +def VMLSL_HIGH : SOpInst<"vmlsl_high", "wwkk", "csiUcUsUi", OP_MLSLHi>; |
| +def VMLSL_HIGH_N : SOpInst<"vmlsl_high_n", "wwks", "siUsUi", OP_MLSLHi_N>; |
| |
| -def VADDHN_HIGH : SOpInst<"vaddhn_high", "(<Q)<QQ", "silUsUiUl", OP_ADDHNHi>; |
| -def VRADDHN_HIGH : SOpInst<"vraddhn_high", "(<Q)<QQ", "silUsUiUl", OP_RADDHNHi>; |
| -def VSUBHN_HIGH : SOpInst<"vsubhn_high", "(<Q)<QQ", "silUsUiUl", OP_SUBHNHi>; |
| -def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "(<Q)<QQ", "silUsUiUl", OP_RSUBHNHi>; |
| +def VADDHN_HIGH : SOpInst<"vaddhn_high", "qhkk", "silUsUiUl", OP_ADDHNHi>; |
| +def VRADDHN_HIGH : SOpInst<"vraddhn_high", "qhkk", "silUsUiUl", OP_RADDHNHi>; |
| +def VSUBHN_HIGH : SOpInst<"vsubhn_high", "qhkk", "silUsUiUl", OP_SUBHNHi>; |
| +def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "qhkk", "silUsUiUl", OP_RSUBHNHi>; |
| |
| -def VQDMULL_HIGH : SOpInst<"vqdmull_high", "(>Q)QQ", "si", OP_QDMULLHi>; |
| -def VQDMULL_HIGH_N : SOpInst<"vqdmull_high_n", "(>Q)Q1", "si", OP_QDMULLHi_N>; |
| -def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "(>Q)(>Q)QQ", "si", OP_QDMLALHi>; |
| -def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLALHi_N>; |
| -def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "(>Q)(>Q)QQ", "si", OP_QDMLSLHi>; |
| -def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLSLHi_N>; |
| -def VMULL_P64 : SInst<"vmull", "(1>)11", "Pl">; |
| -def VMULL_HIGH_P64 : SOpInst<"vmull_high", "(1>)..", "HPl", OP_MULLHi_P64>; |
| +def VQDMULL_HIGH : SOpInst<"vqdmull_high", "wkk", "si", OP_QDMULLHi>; |
| +def VQDMULL_HIGH_N : SOpInst<"vqdmull_high_n", "wks", "si", OP_QDMULLHi_N>; |
| +def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "wwkk", "si", OP_QDMLALHi>; |
| +def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "wwks", "si", OP_QDMLALHi_N>; |
| +def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>; |
| +def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "wwks", "si", OP_QDMLSLHi_N>; |
| +def VMULL_P64 : SInst<"vmull", "rss", "Pl">; |
| +def VMULL_HIGH_P64 : SOpInst<"vmull_high", "rdd", "HPl", OP_MULLHi_P64>; |
| |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Extract or insert element from vector |
| -def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl">; |
| -def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl">; |
| -def COPY_LANE : IOpInst<"vcopy_lane", "..I.I", |
| +def GET_LANE : IInst<"vget_lane", "sdi", "dQdPlQPl">; |
| +def SET_LANE : IInst<"vset_lane", "dsdi", "dQdPlQPl">; |
| +def COPY_LANE : IOpInst<"vcopy_lane", "ddidi", |
| "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>; |
| -def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI", |
| +def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi", |
| "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>; |
| -def COPY_LANEQ : IOpInst<"vcopy_laneq", "..IQI", |
| +def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki", |
| "csilPcPsPlUcUsUiUlfd", OP_COPY_LN>; |
| -def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I", |
| +def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi", |
| "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Set all lanes to same value |
| -def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "hdQhQdPlQPl", OP_DUP_LN>; |
| -def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI", |
| +def VDUP_LANE1: WOpInst<"vdup_lane", "dgi", "hdQhQdPlQPl", OP_DUP_LN>; |
| +def VDUP_LANE2: WOpInst<"vdup_laneq", "dji", |
| "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl", |
| OP_DUP_LN>; |
| -def DUP_N : WOpInst<"vdup_n", ".1", "dQdPlQPl", OP_DUP>; |
| -def MOV_N : WOpInst<"vmov_n", ".1", "dQdPlQPl", OP_DUP>; |
| +def DUP_N : WOpInst<"vdup_n", "ds", "dQdPlQPl", OP_DUP>; |
| +def MOV_N : WOpInst<"vmov_n", "ds", "dQdPlQPl", OP_DUP>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| -def COMBINE : NoTestOpInst<"vcombine", "Q..", "dPl", OP_CONC>; |
| +def COMBINE : NoTestOpInst<"vcombine", "kdd", "dPl", OP_CONC>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| //Initialize a vector from bit pattern |
| -def CREATE : NoTestOpInst<"vcreate", ".(IU>)", "dPl", OP_CAST> { |
| +def CREATE : NoTestOpInst<"vcreate", "dl", "dPl", OP_CAST> { |
| let BigEndianSafe = 1; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| |
| -def VMLA_LANEQ : IOpInst<"vmla_laneq", "...QI", |
| +def VMLA_LANEQ : IOpInst<"vmla_laneq", "dddji", |
| "siUsUifQsQiQUsQUiQf", OP_MLA_LN>; |
| -def VMLS_LANEQ : IOpInst<"vmls_laneq", "...QI", |
| +def VMLS_LANEQ : IOpInst<"vmls_laneq", "dddji", |
| "siUsUifQsQiQUsQUiQf", OP_MLS_LN>; |
| |
| -def VFMA_LANE : IInst<"vfma_lane", "...qI", "fdQfQd">; |
| -def VFMA_LANEQ : IInst<"vfma_laneq", "...QI", "fdQfQd"> { |
| +def VFMA_LANE : IInst<"vfma_lane", "dddgi", "fdQfQd">; |
| +def VFMA_LANEQ : IInst<"vfma_laneq", "dddji", "fdQfQd"> { |
| let isLaneQ = 1; |
| } |
| -def VFMS_LANE : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>; |
| -def VFMS_LANEQ : IOpInst<"vfms_laneq", "...QI", "fdQfQd", OP_FMS_LNQ>; |
| +def VFMS_LANE : IOpInst<"vfms_lane", "dddgi", "fdQfQd", OP_FMS_LN>; |
| +def VFMS_LANEQ : IOpInst<"vfms_laneq", "dddji", "fdQfQd", OP_FMS_LNQ>; |
| |
| -def VMLAL_LANEQ : SOpInst<"vmlal_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLAL_LN>; |
| -def VMLAL_HIGH_LANE : SOpInst<"vmlal_high_lane", "(>Q)(>Q)Q.I", "siUsUi", |
| +def VMLAL_LANEQ : SOpInst<"vmlal_laneq", "wwdki", "siUsUi", OP_MLAL_LN>; |
| +def VMLAL_HIGH_LANE : SOpInst<"vmlal_high_lane", "wwkdi", "siUsUi", |
| OP_MLALHi_LN>; |
| -def VMLAL_HIGH_LANEQ : SOpInst<"vmlal_high_laneq", "(>Q)(>Q)QQI", "siUsUi", |
| +def VMLAL_HIGH_LANEQ : SOpInst<"vmlal_high_laneq", "wwkki", "siUsUi", |
| OP_MLALHi_LN>; |
| -def VMLSL_LANEQ : SOpInst<"vmlsl_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLSL_LN>; |
| -def VMLSL_HIGH_LANE : SOpInst<"vmlsl_high_lane", "(>Q)(>Q)Q.I", "siUsUi", |
| +def VMLSL_LANEQ : SOpInst<"vmlsl_laneq", "wwdki", "siUsUi", OP_MLSL_LN>; |
| +def VMLSL_HIGH_LANE : SOpInst<"vmlsl_high_lane", "wwkdi", "siUsUi", |
| OP_MLSLHi_LN>; |
| -def VMLSL_HIGH_LANEQ : SOpInst<"vmlsl_high_laneq", "(>Q)(>Q)QQI", "siUsUi", |
| +def VMLSL_HIGH_LANEQ : SOpInst<"vmlsl_high_laneq", "wwkki", "siUsUi", |
| OP_MLSLHi_LN>; |
| |
| -def VQDMLAL_LANEQ : SOpInst<"vqdmlal_laneq", "(>Q)(>Q).QI", "si", OP_QDMLAL_LN>; |
| -def VQDMLAL_HIGH_LANE : SOpInst<"vqdmlal_high_lane", "(>Q)(>Q)Q.I", "si", |
| +def VQDMLAL_LANEQ : SOpInst<"vqdmlal_laneq", "wwdki", "si", OP_QDMLAL_LN>; |
| +def VQDMLAL_HIGH_LANE : SOpInst<"vqdmlal_high_lane", "wwkdi", "si", |
| OP_QDMLALHi_LN>; |
| -def VQDMLAL_HIGH_LANEQ : SOpInst<"vqdmlal_high_laneq", "(>Q)(>Q)QQI", "si", |
| +def VQDMLAL_HIGH_LANEQ : SOpInst<"vqdmlal_high_laneq", "wwkki", "si", |
| OP_QDMLALHi_LN>; |
| -def VQDMLSL_LANEQ : SOpInst<"vqdmlsl_laneq", "(>Q)(>Q).QI", "si", OP_QDMLSL_LN>; |
| -def VQDMLSL_HIGH_LANE : SOpInst<"vqdmlsl_high_lane", "(>Q)(>Q)Q.I", "si", |
| +def VQDMLSL_LANEQ : SOpInst<"vqdmlsl_laneq", "wwdki", "si", OP_QDMLSL_LN>; |
| +def VQDMLSL_HIGH_LANE : SOpInst<"vqdmlsl_high_lane", "wwkdi", "si", |
| OP_QDMLSLHi_LN>; |
| -def VQDMLSL_HIGH_LANEQ : SOpInst<"vqdmlsl_high_laneq", "(>Q)(>Q)QQI", "si", |
| +def VQDMLSL_HIGH_LANEQ : SOpInst<"vqdmlsl_high_laneq", "wwkki", "si", |
| OP_QDMLSLHi_LN>; |
| |
| // Newly add double parameter for vmul_lane in aarch64 |
| // Note: d type is handled by SCALAR_VMUL_LANE |
| -def VMUL_LANE_A64 : IOpInst<"vmul_lane", "..qI", "Qd", OP_MUL_LN>; |
| +def VMUL_LANE_A64 : IOpInst<"vmul_lane", "ddgi", "Qd", OP_MUL_LN>; |
| |
| // Note: d type is handled by SCALAR_VMUL_LANEQ |
| -def VMUL_LANEQ : IOpInst<"vmul_laneq", "..QI", |
| +def VMUL_LANEQ : IOpInst<"vmul_laneq", "ddji", |
| "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN>; |
| -def VMULL_LANEQ : SOpInst<"vmull_laneq", "(>Q).QI", "siUsUi", OP_MULL_LN>; |
| -def VMULL_HIGH_LANE : SOpInst<"vmull_high_lane", "(>Q)Q.I", "siUsUi", |
| +def VMULL_LANEQ : SOpInst<"vmull_laneq", "wdki", "siUsUi", OP_MULL_LN>; |
| +def VMULL_HIGH_LANE : SOpInst<"vmull_high_lane", "wkdi", "siUsUi", |
| OP_MULLHi_LN>; |
| -def VMULL_HIGH_LANEQ : SOpInst<"vmull_high_laneq", "(>Q)QQI", "siUsUi", |
| +def VMULL_HIGH_LANEQ : SOpInst<"vmull_high_laneq", "wkki", "siUsUi", |
| OP_MULLHi_LN>; |
| |
| -def VQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(>Q).QI", "si", OP_QDMULL_LN>; |
| -def VQDMULL_HIGH_LANE : SOpInst<"vqdmull_high_lane", "(>Q)Q.I", "si", |
| +def VQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "wdki", "si", OP_QDMULL_LN>; |
| +def VQDMULL_HIGH_LANE : SOpInst<"vqdmull_high_lane", "wkdi", "si", |
| OP_QDMULLHi_LN>; |
| -def VQDMULL_HIGH_LANEQ : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si", |
| +def VQDMULL_HIGH_LANEQ : SOpInst<"vqdmull_high_laneq", "wkki", "si", |
| OP_QDMULLHi_LN>; |
| |
| -def VQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "..QI", "siQsQi", OP_QDMULH_LN>; |
| -def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "..QI", "siQsQi", OP_QRDMULH_LN>; |
| +def VQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ddji", "siQsQi", OP_QDMULH_LN>; |
| +def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ddji", "siQsQi", OP_QRDMULH_LN>; |
| |
| let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { |
| -def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN>; |
| -def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN>; |
| +def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "dddji", "siQsQi", OP_QRDMLAH_LN>; |
| +def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "dddji", "siQsQi", OP_QRDMLSH_LN>; |
| } |
| |
| // Note: d type implemented by SCALAR_VMULX_LANE |
| -def VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "fQfQd", OP_MULX_LN>; |
| +def VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "fQfQd", OP_MULX_LN>; |
| // Note: d type is implemented by SCALAR_VMULX_LANEQ |
| -def VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "fQfQd", OP_MULX_LN>; |
| +def VMULX_LANEQ : IOpInst<"vmulx_laneq", "ddji", "fQfQd", OP_MULX_LN>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Across vectors class |
| -def VADDLV : SInst<"vaddlv", "(1>).", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| -def VMAXV : SInst<"vmaxv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; |
| -def VMINV : SInst<"vminv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; |
| -def VADDV : SInst<"vaddv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQdQlQUl">; |
| -def FMAXNMV : SInst<"vmaxnmv", "1.", "fQfQd">; |
| -def FMINNMV : SInst<"vminnmv", "1.", "fQfQd">; |
| +def VADDLV : SInst<"vaddlv", "rd", "csiUcUsUiQcQsQiQUcQUsQUi">; |
| +def VMAXV : SInst<"vmaxv", "sd", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; |
| +def VMINV : SInst<"vminv", "sd", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; |
| +def VADDV : SInst<"vaddv", "sd", "csifUcUsUiQcQsQiQUcQUsQUiQfQdQlQUl">; |
| +def FMAXNMV : SInst<"vmaxnmv", "sd", "fQfQd">; |
| +def FMINNMV : SInst<"vminnmv", "sd", "fQfQd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Newly added Vector Extract for f64 |
| -def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl">; |
| +def VEXT_A64 : WInst<"vext", "dddi", "dQdPlQPl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Crypto |
| let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO)" in { |
| -def AESE : SInst<"vaese", "...", "QUc">; |
| -def AESD : SInst<"vaesd", "...", "QUc">; |
| -def AESMC : SInst<"vaesmc", "..", "QUc">; |
| -def AESIMC : SInst<"vaesimc", "..", "QUc">; |
| - |
| -def SHA1H : SInst<"vsha1h", "11", "Ui">; |
| -def SHA1SU1 : SInst<"vsha1su1", "...", "QUi">; |
| -def SHA256SU0 : SInst<"vsha256su0", "...", "QUi">; |
| - |
| -def SHA1C : SInst<"vsha1c", "..1.", "QUi">; |
| -def SHA1P : SInst<"vsha1p", "..1.", "QUi">; |
| -def SHA1M : SInst<"vsha1m", "..1.", "QUi">; |
| -def SHA1SU0 : SInst<"vsha1su0", "....", "QUi">; |
| -def SHA256H : SInst<"vsha256h", "....", "QUi">; |
| -def SHA256H2 : SInst<"vsha256h2", "....", "QUi">; |
| -def SHA256SU1 : SInst<"vsha256su1", "....", "QUi">; |
| +def AESE : SInst<"vaese", "ddd", "QUc">; |
| +def AESD : SInst<"vaesd", "ddd", "QUc">; |
| +def AESMC : SInst<"vaesmc", "dd", "QUc">; |
| +def AESIMC : SInst<"vaesimc", "dd", "QUc">; |
| + |
| +def SHA1H : SInst<"vsha1h", "ss", "Ui">; |
| +def SHA1SU1 : SInst<"vsha1su1", "ddd", "QUi">; |
| +def SHA256SU0 : SInst<"vsha256su0", "ddd", "QUi">; |
| + |
| +def SHA1C : SInst<"vsha1c", "ddsd", "QUi">; |
| +def SHA1P : SInst<"vsha1p", "ddsd", "QUi">; |
| +def SHA1M : SInst<"vsha1m", "ddsd", "QUi">; |
| +def SHA1SU0 : SInst<"vsha1su0", "dddd", "QUi">; |
| +def SHA256H : SInst<"vsha256h", "dddd", "QUi">; |
| +def SHA256H2 : SInst<"vsha256h2", "dddd", "QUi">; |
| +def SHA256SU1 : SInst<"vsha256su1", "dddd", "QUi">; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Float -> Int conversions with explicit rounding mode |
| |
| let ArchGuard = "__ARM_ARCH >= 8" in { |
| -def FCVTNS_S32 : SInst<"vcvtn_s32", "S.", "fQf">; |
| -def FCVTNU_S32 : SInst<"vcvtn_u32", "U.", "fQf">; |
| -def FCVTPS_S32 : SInst<"vcvtp_s32", "S.", "fQf">; |
| -def FCVTPU_S32 : SInst<"vcvtp_u32", "U.", "fQf">; |
| -def FCVTMS_S32 : SInst<"vcvtm_s32", "S.", "fQf">; |
| -def FCVTMU_S32 : SInst<"vcvtm_u32", "U.", "fQf">; |
| -def FCVTAS_S32 : SInst<"vcvta_s32", "S.", "fQf">; |
| -def FCVTAU_S32 : SInst<"vcvta_u32", "U.", "fQf">; |
| +def FCVTNS_S32 : SInst<"vcvtn_s32", "xd", "fQf">; |
| +def FCVTNU_S32 : SInst<"vcvtn_u32", "ud", "fQf">; |
| +def FCVTPS_S32 : SInst<"vcvtp_s32", "xd", "fQf">; |
| +def FCVTPU_S32 : SInst<"vcvtp_u32", "ud", "fQf">; |
| +def FCVTMS_S32 : SInst<"vcvtm_s32", "xd", "fQf">; |
| +def FCVTMU_S32 : SInst<"vcvtm_u32", "ud", "fQf">; |
| +def FCVTAS_S32 : SInst<"vcvta_s32", "xd", "fQf">; |
| +def FCVTAU_S32 : SInst<"vcvta_u32", "ud", "fQf">; |
| } |
| |
| let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)" in { |
| -def FCVTNS_S64 : SInst<"vcvtn_s64", "S.", "dQd">; |
| -def FCVTNU_S64 : SInst<"vcvtn_u64", "U.", "dQd">; |
| -def FCVTPS_S64 : SInst<"vcvtp_s64", "S.", "dQd">; |
| -def FCVTPU_S64 : SInst<"vcvtp_u64", "U.", "dQd">; |
| -def FCVTMS_S64 : SInst<"vcvtm_s64", "S.", "dQd">; |
| -def FCVTMU_S64 : SInst<"vcvtm_u64", "U.", "dQd">; |
| -def FCVTAS_S64 : SInst<"vcvta_s64", "S.", "dQd">; |
| -def FCVTAU_S64 : SInst<"vcvta_u64", "U.", "dQd">; |
| +def FCVTNS_S64 : SInst<"vcvtn_s64", "xd", "dQd">; |
| +def FCVTNU_S64 : SInst<"vcvtn_u64", "ud", "dQd">; |
| +def FCVTPS_S64 : SInst<"vcvtp_s64", "xd", "dQd">; |
| +def FCVTPU_S64 : SInst<"vcvtp_u64", "ud", "dQd">; |
| +def FCVTMS_S64 : SInst<"vcvtm_s64", "xd", "dQd">; |
| +def FCVTMU_S64 : SInst<"vcvtm_u64", "ud", "dQd">; |
| +def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "dQd">; |
| +def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "dQd">; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Round to Integral |
| |
| let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { |
| -def FRINTN_S32 : SInst<"vrndn", "..", "fQf">; |
| -def FRINTA_S32 : SInst<"vrnda", "..", "fQf">; |
| -def FRINTP_S32 : SInst<"vrndp", "..", "fQf">; |
| -def FRINTM_S32 : SInst<"vrndm", "..", "fQf">; |
| -def FRINTX_S32 : SInst<"vrndx", "..", "fQf">; |
| -def FRINTZ_S32 : SInst<"vrnd", "..", "fQf">; |
| -def FRINTI_S32 : SInst<"vrndi", "..", "fQf">; |
| +def FRINTN_S32 : SInst<"vrndn", "dd", "fQf">; |
| +def FRINTA_S32 : SInst<"vrnda", "dd", "fQf">; |
| +def FRINTP_S32 : SInst<"vrndp", "dd", "fQf">; |
| +def FRINTM_S32 : SInst<"vrndm", "dd", "fQf">; |
| +def FRINTX_S32 : SInst<"vrndx", "dd", "fQf">; |
| +def FRINTZ_S32 : SInst<"vrnd", "dd", "fQf">; |
| +def FRINTI_S32 : SInst<"vrndi", "dd", "fQf">; |
| } |
| |
| let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { |
| -def FRINTN_S64 : SInst<"vrndn", "..", "dQd">; |
| -def FRINTA_S64 : SInst<"vrnda", "..", "dQd">; |
| -def FRINTP_S64 : SInst<"vrndp", "..", "dQd">; |
| -def FRINTM_S64 : SInst<"vrndm", "..", "dQd">; |
| -def FRINTX_S64 : SInst<"vrndx", "..", "dQd">; |
| -def FRINTZ_S64 : SInst<"vrnd", "..", "dQd">; |
| -def FRINTI_S64 : SInst<"vrndi", "..", "dQd">; |
| +def FRINTN_S64 : SInst<"vrndn", "dd", "dQd">; |
| +def FRINTA_S64 : SInst<"vrnda", "dd", "dQd">; |
| +def FRINTP_S64 : SInst<"vrndp", "dd", "dQd">; |
| +def FRINTM_S64 : SInst<"vrndm", "dd", "dQd">; |
| +def FRINTX_S64 : SInst<"vrndx", "dd", "dQd">; |
| +def FRINTZ_S64 : SInst<"vrnd", "dd", "dQd">; |
| +def FRINTI_S64 : SInst<"vrndi", "dd", "dQd">; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // MaxNum/MinNum Floating Point |
| |
| let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in { |
| -def FMAXNM_S32 : SInst<"vmaxnm", "...", "fQf">; |
| -def FMINNM_S32 : SInst<"vminnm", "...", "fQf">; |
| +def FMAXNM_S32 : SInst<"vmaxnm", "ddd", "fQf">; |
| +def FMINNM_S32 : SInst<"vminnm", "ddd", "fQf">; |
| } |
| |
| let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in { |
| -def FMAXNM_S64 : SInst<"vmaxnm", "...", "dQd">; |
| -def FMINNM_S64 : SInst<"vminnm", "...", "dQd">; |
| +def FMAXNM_S64 : SInst<"vmaxnm", "ddd", "dQd">; |
| +def FMINNM_S64 : SInst<"vminnm", "ddd", "dQd">; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Permutation |
| -def VTRN1 : SOpInst<"vtrn1", "...", |
| +def VTRN1 : SOpInst<"vtrn1", "ddd", |
| "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN1>; |
| -def VZIP1 : SOpInst<"vzip1", "...", |
| +def VZIP1 : SOpInst<"vzip1", "ddd", |
| "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP1>; |
| -def VUZP1 : SOpInst<"vuzp1", "...", |
| +def VUZP1 : SOpInst<"vuzp1", "ddd", |
| "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP1>; |
| -def VTRN2 : SOpInst<"vtrn2", "...", |
| +def VTRN2 : SOpInst<"vtrn2", "ddd", |
| "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN2>; |
| -def VZIP2 : SOpInst<"vzip2", "...", |
| +def VZIP2 : SOpInst<"vzip2", "ddd", |
| "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP2>; |
| -def VUZP2 : SOpInst<"vuzp2", "...", |
| +def VUZP2 : SOpInst<"vuzp2", "ddd", |
| "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP2>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Table lookup |
| let InstName = "vtbl" in { |
| -def VQTBL1_A64 : WInst<"vqtbl1", ".QU", "UccPcQUcQcQPc">; |
| -def VQTBL2_A64 : WInst<"vqtbl2", ".(2Q)U", "UccPcQUcQcQPc">; |
| -def VQTBL3_A64 : WInst<"vqtbl3", ".(3Q)U", "UccPcQUcQcQPc">; |
| -def VQTBL4_A64 : WInst<"vqtbl4", ".(4Q)U", "UccPcQUcQcQPc">; |
| +def VQTBL1_A64 : WInst<"vqtbl1", "dju", "UccPcQUcQcQPc">; |
| +def VQTBL2_A64 : WInst<"vqtbl2", "dBu", "UccPcQUcQcQPc">; |
| +def VQTBL3_A64 : WInst<"vqtbl3", "dCu", "UccPcQUcQcQPc">; |
| +def VQTBL4_A64 : WInst<"vqtbl4", "dDu", "UccPcQUcQcQPc">; |
| } |
| let InstName = "vtbx" in { |
| -def VQTBX1_A64 : WInst<"vqtbx1", "..QU", "UccPcQUcQcQPc">; |
| -def VQTBX2_A64 : WInst<"vqtbx2", "..(2Q)U", "UccPcQUcQcQPc">; |
| -def VQTBX3_A64 : WInst<"vqtbx3", "..(3Q)U", "UccPcQUcQcQPc">; |
| -def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPc">; |
| +def VQTBX1_A64 : WInst<"vqtbx1", "ddju", "UccPcQUcQcQPc">; |
| +def VQTBX2_A64 : WInst<"vqtbx2", "ddBu", "UccPcQUcQcQPc">; |
| +def VQTBX3_A64 : WInst<"vqtbx3", "ddCu", "UccPcQUcQcQPc">; |
| +def VQTBX4_A64 : WInst<"vqtbx4", "ddDu", "UccPcQUcQcQPc">; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| @@ -1095,7 +1095,7 @@ def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPc">; |
| // itself during generation so, unlike all other intrinsics, this one should |
| // include *all* types, not just additional ones. |
| def VVREINTERPRET |
| - : NoTestOpInst<"vreinterpret", "..", |
| + : NoTestOpInst<"vreinterpret", "dd", |
| "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", OP_REINT> { |
| let CartesianProductOfTypes = 1; |
| let BigEndianSafe = 1; |
| @@ -1107,332 +1107,332 @@ def VVREINTERPRET |
| // Scalar Arithmetic |
| |
| // Scalar Addition |
| -def SCALAR_ADD : SInst<"vadd", "111", "SlSUl">; |
| +def SCALAR_ADD : SInst<"vadd", "sss", "SlSUl">; |
| // Scalar Saturating Add |
| -def SCALAR_QADD : SInst<"vqadd", "111", "ScSsSiSlSUcSUsSUiSUl">; |
| +def SCALAR_QADD : SInst<"vqadd", "sss", "ScSsSiSlSUcSUsSUiSUl">; |
| |
| // Scalar Subtraction |
| -def SCALAR_SUB : SInst<"vsub", "111", "SlSUl">; |
| +def SCALAR_SUB : SInst<"vsub", "sss", "SlSUl">; |
| // Scalar Saturating Sub |
| -def SCALAR_QSUB : SInst<"vqsub", "111", "ScSsSiSlSUcSUsSUiSUl">; |
| +def SCALAR_QSUB : SInst<"vqsub", "sss", "ScSsSiSlSUcSUsSUiSUl">; |
| |
| let InstName = "vmov" in { |
| -def VGET_HIGH_A64 : NoTestOpInst<"vget_high", ".Q", "dPl", OP_HI>; |
| -def VGET_LOW_A64 : NoTestOpInst<"vget_low", ".Q", "dPl", OP_LO>; |
| +def VGET_HIGH_A64 : NoTestOpInst<"vget_high", "dk", "dPl", OP_HI>; |
| +def VGET_LOW_A64 : NoTestOpInst<"vget_low", "dk", "dPl", OP_LO>; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Shift |
| // Scalar Shift Left |
| -def SCALAR_SHL: SInst<"vshl", "111", "SlSUl">; |
| +def SCALAR_SHL: SInst<"vshl", "sss", "SlSUl">; |
| // Scalar Saturating Shift Left |
| -def SCALAR_QSHL: SInst<"vqshl", "111", "ScSsSiSlSUcSUsSUiSUl">; |
| +def SCALAR_QSHL: SInst<"vqshl", "sss", "ScSsSiSlSUcSUsSUiSUl">; |
| // Scalar Saturating Rounding Shift Left |
| -def SCALAR_QRSHL: SInst<"vqrshl", "111", "ScSsSiSlSUcSUsSUiSUl">; |
| +def SCALAR_QRSHL: SInst<"vqrshl", "sss", "ScSsSiSlSUcSUsSUiSUl">; |
| // Scalar Shift Rounding Left |
| -def SCALAR_RSHL: SInst<"vrshl", "111", "SlSUl">; |
| +def SCALAR_RSHL: SInst<"vrshl", "sss", "SlSUl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Shift (Immediate) |
| let isScalarShift = 1 in { |
| // Signed/Unsigned Shift Right (Immediate) |
| -def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl">; |
| +def SCALAR_SSHR_N: SInst<"vshr_n", "ssi", "SlSUl">; |
| // Signed/Unsigned Rounding Shift Right (Immediate) |
| -def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl">; |
| +def SCALAR_SRSHR_N: SInst<"vrshr_n", "ssi", "SlSUl">; |
| |
| // Signed/Unsigned Shift Right and Accumulate (Immediate) |
| -def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl">; |
| +def SCALAR_SSRA_N: SInst<"vsra_n", "sssi", "SlSUl">; |
| // Signed/Unsigned Rounding Shift Right and Accumulate (Immediate) |
| -def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl">; |
| +def SCALAR_SRSRA_N: SInst<"vrsra_n", "sssi", "SlSUl">; |
| |
| // Shift Left (Immediate) |
| -def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl">; |
| +def SCALAR_SHL_N: SInst<"vshl_n", "ssi", "SlSUl">; |
| // Signed/Unsigned Saturating Shift Left (Immediate) |
| -def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl">; |
| +def SCALAR_SQSHL_N: SInst<"vqshl_n", "ssi", "ScSsSiSlSUcSUsSUiSUl">; |
| // Signed Saturating Shift Left Unsigned (Immediate) |
| -def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl">; |
| +def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "ssi", "ScSsSiSl">; |
| |
| // Shift Right And Insert (Immediate) |
| -def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl">; |
| +def SCALAR_SRI_N: SInst<"vsri_n", "sssi", "SlSUl">; |
| // Shift Left And Insert (Immediate) |
| -def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl">; |
| +def SCALAR_SLI_N: SInst<"vsli_n", "sssi", "SlSUl">; |
| |
| let isScalarNarrowShift = 1 in { |
| // Signed/Unsigned Saturating Shift Right Narrow (Immediate) |
| - def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">; |
| + def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "zsi", "SsSiSlSUsSUiSUl">; |
| // Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate) |
| - def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">; |
| + def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "zsi", "SsSiSlSUsSUiSUl">; |
| // Signed Saturating Shift Right Unsigned Narrow (Immediate) |
| - def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<)1I", "SsSiSl">; |
| + def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "zsi", "SsSiSl">; |
| // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) |
| - def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<)1I", "SsSiSl">; |
| + def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "zsi", "SsSiSl">; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Signed/Unsigned Fixed-point Convert To Floating-Point (Immediate) |
| -def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi">; |
| -def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl">; |
| +def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "ysi", "SiSUi">; |
| +def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "osi", "SlSUl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Floating-point Convert To Signed/Unsigned Fixed-point (Immediate) |
| -def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf">; |
| -def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf">; |
| -def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd">; |
| -def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd">; |
| +def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "$si", "Sf">; |
| +def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "bsi", "Sf">; |
| +def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "$si", "Sd">; |
| +def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "bsi", "Sd">; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Floating-point Round to Integral |
| let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { |
| -def SCALAR_FRINTN_S32 : SInst<"vrndn", "11", "Sf">; |
| +def SCALAR_FRINTN_S32 : SInst<"vrndn", "ss", "Sf">; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Reduce Pairwise Addition (Scalar and Floating Point) |
| -def SCALAR_ADDP : SInst<"vpadd", "1.", "SfSHlSHdSHUl">; |
| +def SCALAR_ADDP : SInst<"vpadd", "sd", "SfSHlSHdSHUl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Reduce Floating Point Pairwise Max/Min |
| -def SCALAR_FMAXP : SInst<"vpmax", "1.", "SfSQd">; |
| +def SCALAR_FMAXP : SInst<"vpmax", "sd", "SfSQd">; |
| |
| -def SCALAR_FMINP : SInst<"vpmin", "1.", "SfSQd">; |
| +def SCALAR_FMINP : SInst<"vpmin", "sd", "SfSQd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Reduce Floating Point Pairwise maxNum/minNum |
| -def SCALAR_FMAXNMP : SInst<"vpmaxnm", "1.", "SfSQd">; |
| -def SCALAR_FMINNMP : SInst<"vpminnm", "1.", "SfSQd">; |
| +def SCALAR_FMAXNMP : SInst<"vpmaxnm", "sd", "SfSQd">; |
| +def SCALAR_FMINNMP : SInst<"vpminnm", "sd", "SfSQd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Integer Saturating Doubling Multiply Half High |
| -def SCALAR_SQDMULH : SInst<"vqdmulh", "111", "SsSi">; |
| +def SCALAR_SQDMULH : SInst<"vqdmulh", "sss", "SsSi">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Integer Saturating Rounding Doubling Multiply Half High |
| -def SCALAR_SQRDMULH : SInst<"vqrdmulh", "111", "SsSi">; |
| +def SCALAR_SQRDMULH : SInst<"vqrdmulh", "sss", "SsSi">; |
| |
| let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { |
| //////////////////////////////////////////////////////////////////////////////// |
| // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half |
| -def SCALAR_SQRDMLAH : SOpInst<"vqrdmlah", "1111", "SsSi", OP_QRDMLAH>; |
| +def SCALAR_SQRDMLAH : SOpInst<"vqrdmlah", "ssss", "SsSi", OP_QRDMLAH>; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half |
| -def SCALAR_SQRDMLSH : SOpInst<"vqrdmlsh", "1111", "SsSi", OP_QRDMLSH>; |
| +def SCALAR_SQRDMLSH : SOpInst<"vqrdmlsh", "ssss", "SsSi", OP_QRDMLSH>; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Floating-point Multiply Extended |
| -def SCALAR_FMULX : IInst<"vmulx", "111", "SfSd">; |
| +def SCALAR_FMULX : IInst<"vmulx", "sss", "SfSd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Floating-point Reciprocal Step |
| -def SCALAR_FRECPS : IInst<"vrecps", "111", "SfSd">; |
| +def SCALAR_FRECPS : IInst<"vrecps", "sss", "SfSd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Floating-point Reciprocal Square Root Step |
| -def SCALAR_FRSQRTS : IInst<"vrsqrts", "111", "SfSd">; |
| +def SCALAR_FRSQRTS : IInst<"vrsqrts", "sss", "SfSd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Signed Integer Convert To Floating-point |
| -def SCALAR_SCVTFS : SInst<"vcvt_f32", "(1F)(1!)", "Si">; |
| -def SCALAR_SCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "Sl">; |
| +def SCALAR_SCVTFS : SInst<"vcvt_f32", "ys", "Si">; |
| +def SCALAR_SCVTFD : SInst<"vcvt_f64", "os", "Sl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Unsigned Integer Convert To Floating-point |
| -def SCALAR_UCVTFS : SInst<"vcvt_f32", "(1F)(1!)", "SUi">; |
| -def SCALAR_UCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "SUl">; |
| +def SCALAR_UCVTFS : SInst<"vcvt_f32", "ys", "SUi">; |
| +def SCALAR_UCVTFD : SInst<"vcvt_f64", "os", "SUl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Floating-point Converts |
| -def SCALAR_FCVTXN : IInst<"vcvtx_f32", "(1F<)(1!)", "Sd">; |
| -def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "(1S)1", "Sf">; |
| -def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "(1U)1", "Sf">; |
| -def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "(1S)1", "Sd">; |
| -def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "(1U)1", "Sd">; |
| -def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "(1S)1", "Sf">; |
| -def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "(1U)1", "Sf">; |
| -def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "(1S)1", "Sd">; |
| -def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "(1U)1", "Sd">; |
| -def SCALAR_FCVTASS : SInst<"vcvta_s32", "(1S)1", "Sf">; |
| -def SCALAR_FCVTAUS : SInst<"vcvta_u32", "(1U)1", "Sf">; |
| -def SCALAR_FCVTASD : SInst<"vcvta_s64", "(1S)1", "Sd">; |
| -def SCALAR_FCVTAUD : SInst<"vcvta_u64", "(1U)1", "Sd">; |
| -def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "(1S)1", "Sf">; |
| -def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "(1U)1", "Sf">; |
| -def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "(1S)1", "Sd">; |
| -def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "(1U)1", "Sd">; |
| -def SCALAR_FCVTZSS : SInst<"vcvt_s32", "(1S)1", "Sf">; |
| -def SCALAR_FCVTZUS : SInst<"vcvt_u32", "(1U)1", "Sf">; |
| -def SCALAR_FCVTZSD : SInst<"vcvt_s64", "(1S)1", "Sd">; |
| -def SCALAR_FCVTZUD : SInst<"vcvt_u64", "(1U)1", "Sd">; |
| +def SCALAR_FCVTXN : IInst<"vcvtx_f32", "ys", "Sd">; |
| +def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "$s", "Sf">; |
| +def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "bs", "Sf">; |
| +def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "$s", "Sd">; |
| +def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "bs", "Sd">; |
| +def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "$s", "Sf">; |
| +def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "bs", "Sf">; |
| +def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "$s", "Sd">; |
| +def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "bs", "Sd">; |
| +def SCALAR_FCVTASS : SInst<"vcvta_s32", "$s", "Sf">; |
| +def SCALAR_FCVTAUS : SInst<"vcvta_u32", "bs", "Sf">; |
| +def SCALAR_FCVTASD : SInst<"vcvta_s64", "$s", "Sd">; |
| +def SCALAR_FCVTAUD : SInst<"vcvta_u64", "bs", "Sd">; |
| +def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "$s", "Sf">; |
| +def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "bs", "Sf">; |
| +def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "$s", "Sd">; |
| +def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "bs", "Sd">; |
| +def SCALAR_FCVTZSS : SInst<"vcvt_s32", "$s", "Sf">; |
| +def SCALAR_FCVTZUS : SInst<"vcvt_u32", "bs", "Sf">; |
| +def SCALAR_FCVTZSD : SInst<"vcvt_s64", "$s", "Sd">; |
| +def SCALAR_FCVTZUD : SInst<"vcvt_u64", "bs", "Sd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Floating-point Reciprocal Estimate |
| -def SCALAR_FRECPE : IInst<"vrecpe", "11", "SfSd">; |
| +def SCALAR_FRECPE : IInst<"vrecpe", "ss", "SfSd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Floating-point Reciprocal Exponent |
| -def SCALAR_FRECPX : IInst<"vrecpx", "11", "SfSd">; |
| +def SCALAR_FRECPX : IInst<"vrecpx", "ss", "SfSd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Floating-point Reciprocal Square Root Estimate |
| -def SCALAR_FRSQRTE : IInst<"vrsqrte", "11", "SfSd">; |
| +def SCALAR_FRSQRTE : IInst<"vrsqrte", "ss", "SfSd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Integer Comparison |
| -def SCALAR_CMEQ : SInst<"vceq", "111", "SlSUl">; |
| -def SCALAR_CMEQZ : SInst<"vceqz", "11", "SlSUl">; |
| -def SCALAR_CMGE : SInst<"vcge", "111", "Sl">; |
| -def SCALAR_CMGEZ : SInst<"vcgez", "11", "Sl">; |
| -def SCALAR_CMHS : SInst<"vcge", "111", "SUl">; |
| -def SCALAR_CMLE : SInst<"vcle", "111", "SlSUl">; |
| -def SCALAR_CMLEZ : SInst<"vclez", "11", "Sl">; |
| -def SCALAR_CMLT : SInst<"vclt", "111", "SlSUl">; |
| -def SCALAR_CMLTZ : SInst<"vcltz", "11", "Sl">; |
| -def SCALAR_CMGT : SInst<"vcgt", "111", "Sl">; |
| -def SCALAR_CMGTZ : SInst<"vcgtz", "11", "Sl">; |
| -def SCALAR_CMHI : SInst<"vcgt", "111", "SUl">; |
| -def SCALAR_CMTST : SInst<"vtst", "111", "SlSUl">; |
| +def SCALAR_CMEQ : SInst<"vceq", "sss", "SlSUl">; |
| +def SCALAR_CMEQZ : SInst<"vceqz", "ss", "SlSUl">; |
| +def SCALAR_CMGE : SInst<"vcge", "sss", "Sl">; |
| +def SCALAR_CMGEZ : SInst<"vcgez", "ss", "Sl">; |
| +def SCALAR_CMHS : SInst<"vcge", "sss", "SUl">; |
| +def SCALAR_CMLE : SInst<"vcle", "sss", "SlSUl">; |
| +def SCALAR_CMLEZ : SInst<"vclez", "ss", "Sl">; |
| +def SCALAR_CMLT : SInst<"vclt", "sss", "SlSUl">; |
| +def SCALAR_CMLTZ : SInst<"vcltz", "ss", "Sl">; |
| +def SCALAR_CMGT : SInst<"vcgt", "sss", "Sl">; |
| +def SCALAR_CMGTZ : SInst<"vcgtz", "ss", "Sl">; |
| +def SCALAR_CMHI : SInst<"vcgt", "sss", "SUl">; |
| +def SCALAR_CMTST : SInst<"vtst", "sss", "SlSUl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Floating-point Comparison |
| -def SCALAR_FCMEQ : IInst<"vceq", "(1U)11", "SfSd">; |
| -def SCALAR_FCMEQZ : IInst<"vceqz", "(1U)1", "SfSd">; |
| -def SCALAR_FCMGE : IInst<"vcge", "(1U)11", "SfSd">; |
| -def SCALAR_FCMGEZ : IInst<"vcgez", "(1U)1", "SfSd">; |
| -def SCALAR_FCMGT : IInst<"vcgt", "(1U)11", "SfSd">; |
| -def SCALAR_FCMGTZ : IInst<"vcgtz", "(1U)1", "SfSd">; |
| -def SCALAR_FCMLE : IInst<"vcle", "(1U)11", "SfSd">; |
| -def SCALAR_FCMLEZ : IInst<"vclez", "(1U)1", "SfSd">; |
| -def SCALAR_FCMLT : IInst<"vclt", "(1U)11", "SfSd">; |
| -def SCALAR_FCMLTZ : IInst<"vcltz", "(1U)1", "SfSd">; |
| +def SCALAR_FCMEQ : IInst<"vceq", "bss", "SfSd">; |
| +def SCALAR_FCMEQZ : IInst<"vceqz", "bs", "SfSd">; |
| +def SCALAR_FCMGE : IInst<"vcge", "bss", "SfSd">; |
| +def SCALAR_FCMGEZ : IInst<"vcgez", "bs", "SfSd">; |
| +def SCALAR_FCMGT : IInst<"vcgt", "bss", "SfSd">; |
| +def SCALAR_FCMGTZ : IInst<"vcgtz", "bs", "SfSd">; |
| +def SCALAR_FCMLE : IInst<"vcle", "bss", "SfSd">; |
| +def SCALAR_FCMLEZ : IInst<"vclez", "bs", "SfSd">; |
| +def SCALAR_FCMLT : IInst<"vclt", "bss", "SfSd">; |
| +def SCALAR_FCMLTZ : IInst<"vcltz", "bs", "SfSd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal |
| -def SCALAR_FACGE : IInst<"vcage", "(1U)11", "SfSd">; |
| -def SCALAR_FACLE : IInst<"vcale", "(1U)11", "SfSd">; |
| +def SCALAR_FACGE : IInst<"vcage", "bss", "SfSd">; |
| +def SCALAR_FACLE : IInst<"vcale", "bss", "SfSd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Floating-point Absolute Compare Mask Greater Than |
| -def SCALAR_FACGT : IInst<"vcagt", "(1U)11", "SfSd">; |
| -def SCALAR_FACLT : IInst<"vcalt", "(1U)11", "SfSd">; |
| +def SCALAR_FACGT : IInst<"vcagt", "bss", "SfSd">; |
| +def SCALAR_FACLT : IInst<"vcalt", "bss", "SfSd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Absolute Value |
| -def SCALAR_ABS : SInst<"vabs", "11", "Sl">; |
| +def SCALAR_ABS : SInst<"vabs", "ss", "Sl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Absolute Difference |
| -def SCALAR_ABD : IInst<"vabd", "111", "SfSd">; |
| +def SCALAR_ABD : IInst<"vabd", "sss", "SfSd">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Signed Saturating Absolute Value |
| -def SCALAR_SQABS : SInst<"vqabs", "11", "ScSsSiSl">; |
| +def SCALAR_SQABS : SInst<"vqabs", "ss", "ScSsSiSl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Negate |
| -def SCALAR_NEG : SInst<"vneg", "11", "Sl">; |
| +def SCALAR_NEG : SInst<"vneg", "ss", "Sl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Signed Saturating Negate |
| -def SCALAR_SQNEG : SInst<"vqneg", "11", "ScSsSiSl">; |
| +def SCALAR_SQNEG : SInst<"vqneg", "ss", "ScSsSiSl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Signed Saturating Accumulated of Unsigned Value |
| -def SCALAR_SUQADD : SInst<"vuqadd", "11(1U)", "ScSsSiSl">; |
| +def SCALAR_SUQADD : SInst<"vuqadd", "ssb", "ScSsSiSl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Unsigned Saturating Accumulated of Signed Value |
| -def SCALAR_USQADD : SInst<"vsqadd", "11(1S)", "SUcSUsSUiSUl">; |
| +def SCALAR_USQADD : SInst<"vsqadd", "ss$", "SUcSUsSUiSUl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Signed Saturating Doubling Multiply-Add Long |
| -def SCALAR_SQDMLAL : SInst<"vqdmlal", "(1>)(1>)11", "SsSi">; |
| +def SCALAR_SQDMLAL : SInst<"vqdmlal", "rrss", "SsSi">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Signed Saturating Doubling Multiply-Subtract Long |
| -def SCALAR_SQDMLSL : SInst<"vqdmlsl", "(1>)(1>)11", "SsSi">; |
| +def SCALAR_SQDMLSL : SInst<"vqdmlsl", "rrss", "SsSi">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Signed Saturating Doubling Multiply Long |
| -def SCALAR_SQDMULL : SInst<"vqdmull", "(1>)11", "SsSi">; |
| +def SCALAR_SQDMULL : SInst<"vqdmull", "rss", "SsSi">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Signed Saturating Extract Unsigned Narrow |
| -def SCALAR_SQXTUN : SInst<"vqmovun", "(1<)1", "SsSiSl">; |
| +def SCALAR_SQXTUN : SInst<"vqmovun", "zs", "SsSiSl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Signed Saturating Extract Narrow |
| -def SCALAR_SQXTN : SInst<"vqmovn", "(1<)1", "SsSiSl">; |
| +def SCALAR_SQXTN : SInst<"vqmovn", "zs", "SsSiSl">; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // Scalar Unsigned Saturating Extract Narrow |
| -def SCALAR_UQXTN : SInst<"vqmovn", "(1<)1", "SUsSUiSUl">; |
| +def SCALAR_UQXTN : SInst<"vqmovn", "zs", "SUsSUiSUl">; |
| |
| // Scalar Floating Point multiply (scalar, by element) |
| -def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "11.I", "SfSd", OP_SCALAR_MUL_LN>; |
| -def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "11QI", "SfSd", OP_SCALAR_MUL_LN>; |
| +def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "ssdi", "SfSd", OP_SCALAR_MUL_LN>; |
| +def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "ssji", "SfSd", OP_SCALAR_MUL_LN>; |
| |
| // Scalar Floating Point multiply extended (scalar, by element) |
| -def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "11.I", "SfSd", OP_SCALAR_MULX_LN>; |
| -def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_LN>; |
| +def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "ssdi", "SfSd", OP_SCALAR_MULX_LN>; |
| +def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "ssji", "SfSd", OP_SCALAR_MULX_LN>; |
| |
| -def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">; |
| +def SCALAR_VMUL_N : IInst<"vmul_n", "dds", "d">; |
| |
| // VMUL_LANE_A64 d type implemented using scalar mul lane |
| -def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d">; |
| +def SCALAR_VMUL_LANE : IInst<"vmul_lane", "ddgi", "d">; |
| |
| // VMUL_LANEQ d type implemented using scalar mul lane |
| -def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d"> { |
| +def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "ddji", "d"> { |
| let isLaneQ = 1; |
| } |
| |
| // VMULX_LANE d type implemented using scalar vmulx_lane |
| -def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "d", OP_SCALAR_VMULX_LN>; |
| +def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "d", OP_SCALAR_VMULX_LN>; |
| |
| // VMULX_LANEQ d type implemented using scalar vmulx_laneq |
| -def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ>; |
| +def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "ddji", "d", OP_SCALAR_VMULX_LNQ>; |
| |
| // Scalar Floating Point fused multiply-add (scalar, by element) |
| -def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd">; |
| -def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd">; |
| +def SCALAR_FMLA_LANE : IInst<"vfma_lane", "sssdi", "SfSd">; |
| +def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "sssji", "SfSd">; |
| |
| // Scalar Floating Point fused multiply-subtract (scalar, by element) |
| -def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "111.I", "SfSd", OP_FMS_LN>; |
| -def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "111QI", "SfSd", OP_FMS_LNQ>; |
| +def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "sssdi", "SfSd", OP_FMS_LN>; |
| +def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "sssji", "SfSd", OP_FMS_LNQ>; |
| |
| // Signed Saturating Doubling Multiply Long (scalar by element) |
| -def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "(1>)1.I", "SsSi", OP_SCALAR_QDMULL_LN>; |
| -def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR_QDMULL_LN>; |
| +def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "rsdi", "SsSi", OP_SCALAR_QDMULL_LN>; |
| +def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "rsji", "SsSi", OP_SCALAR_QDMULL_LN>; |
| |
| // Signed Saturating Doubling Multiply-Add Long (scalar by element) |
| -def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi">; |
| -def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi">; |
| +def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "rrsdi", "SsSi">; |
| +def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "rrsji", "SsSi">; |
| |
| // Signed Saturating Doubling Multiply-Subtract Long (scalar by element) |
| -def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi">; |
| -def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi">; |
| +def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "rrsdi", "SsSi">; |
| +def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "rrsji", "SsSi">; |
| |
| // Scalar Integer Saturating Doubling Multiply Half High (scalar by element) |
| -def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "11.I", "SsSi", OP_SCALAR_QDMULH_LN>; |
| -def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QDMULH_LN>; |
| +def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QDMULH_LN>; |
| +def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QDMULH_LN>; |
| |
| // Scalar Integer Saturating Rounding Doubling Multiply Half High |
| -def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "11.I", "SsSi", OP_SCALAR_QRDMULH_LN>; |
| -def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QRDMULH_LN>; |
| +def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QRDMULH_LN>; |
| +def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LN>; |
| |
| let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { |
| // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half |
| -def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "111.I", "SsSi", OP_SCALAR_QRDMLAH_LN>; |
| -def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLAH_LN>; |
| +def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLAH_LN>; |
| +def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLAH_LN>; |
| |
| // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half |
| -def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "111.I", "SsSi", OP_SCALAR_QRDMLSH_LN>; |
| -def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN>; |
| +def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLSH_LN>; |
| +def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLSH_LN>; |
| } |
| |
| -def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; |
| -def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; |
| +def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; |
| +def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; |
| } |
| |
| // ARMv8.2-A FP16 vector intrinsics for A32/A64. |
| @@ -1441,234 +1441,234 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { |
| // ARMv8.2-A FP16 one-operand vector intrinsics. |
| |
| // Comparison |
| - def CMEQH : SInst<"vceqz", "U.", "hQh">; |
| - def CMGEH : SInst<"vcgez", "U.", "hQh">; |
| - def CMGTH : SInst<"vcgtz", "U.", "hQh">; |
| - def CMLEH : SInst<"vclez", "U.", "hQh">; |
| - def CMLTH : SInst<"vcltz", "U.", "hQh">; |
| + def CMEQH : SInst<"vceqz", "ud", "hQh">; |
| + def CMGEH : SInst<"vcgez", "ud", "hQh">; |
| + def CMGTH : SInst<"vcgtz", "ud", "hQh">; |
| + def CMLEH : SInst<"vclez", "ud", "hQh">; |
| + def CMLTH : SInst<"vcltz", "ud", "hQh">; |
| |
| // Vector conversion |
| - def VCVT_F16 : SInst<"vcvt_f16", "F(.!)", "sUsQsQUs">; |
| - def VCVT_S16 : SInst<"vcvt_s16", "S.", "hQh">; |
| - def VCVT_U16 : SInst<"vcvt_u16", "U.", "hQh">; |
| - def VCVTA_S16 : SInst<"vcvta_s16", "S.", "hQh">; |
| - def VCVTA_U16 : SInst<"vcvta_u16", "U.", "hQh">; |
| - def VCVTM_S16 : SInst<"vcvtm_s16", "S.", "hQh">; |
| - def VCVTM_U16 : SInst<"vcvtm_u16", "U.", "hQh">; |
| - def VCVTN_S16 : SInst<"vcvtn_s16", "S.", "hQh">; |
| - def VCVTN_U16 : SInst<"vcvtn_u16", "U.", "hQh">; |
| - def VCVTP_S16 : SInst<"vcvtp_s16", "S.", "hQh">; |
| - def VCVTP_U16 : SInst<"vcvtp_u16", "U.", "hQh">; |
| + def VCVT_F16 : SInst<"vcvt_f16", "Hd", "sUsQsQUs">; |
| + def VCVT_S16 : SInst<"vcvt_s16", "xd", "hQh">; |
| + def VCVT_U16 : SInst<"vcvt_u16", "ud", "hQh">; |
| + def VCVTA_S16 : SInst<"vcvta_s16", "xd", "hQh">; |
| + def VCVTA_U16 : SInst<"vcvta_u16", "ud", "hQh">; |
| + def VCVTM_S16 : SInst<"vcvtm_s16", "xd", "hQh">; |
| + def VCVTM_U16 : SInst<"vcvtm_u16", "ud", "hQh">; |
| + def VCVTN_S16 : SInst<"vcvtn_s16", "xd", "hQh">; |
| + def VCVTN_U16 : SInst<"vcvtn_u16", "ud", "hQh">; |
| + def VCVTP_S16 : SInst<"vcvtp_s16", "xd", "hQh">; |
| + def VCVTP_U16 : SInst<"vcvtp_u16", "ud", "hQh">; |
| |
| // Vector rounding |
| let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { |
| - def FRINTZH : SInst<"vrnd", "..", "hQh">; |
| - def FRINTNH : SInst<"vrndn", "..", "hQh">; |
| - def FRINTAH : SInst<"vrnda", "..", "hQh">; |
| - def FRINTPH : SInst<"vrndp", "..", "hQh">; |
| - def FRINTMH : SInst<"vrndm", "..", "hQh">; |
| - def FRINTXH : SInst<"vrndx", "..", "hQh">; |
| + def FRINTZH : SInst<"vrnd", "dd", "hQh">; |
| + def FRINTNH : SInst<"vrndn", "dd", "hQh">; |
| + def FRINTAH : SInst<"vrnda", "dd", "hQh">; |
| + def FRINTPH : SInst<"vrndp", "dd", "hQh">; |
| + def FRINTMH : SInst<"vrndm", "dd", "hQh">; |
| + def FRINTXH : SInst<"vrndx", "dd", "hQh">; |
| } |
| |
| // Misc. |
| - def VABSH : SInst<"vabs", "..", "hQh">; |
| - def VNEGH : SOpInst<"vneg", "..", "hQh", OP_NEG>; |
| - def VRECPEH : SInst<"vrecpe", "..", "hQh">; |
| - def FRSQRTEH : SInst<"vrsqrte", "..", "hQh">; |
| + def VABSH : SInst<"vabs", "dd", "hQh">; |
| + def VNEGH : SOpInst<"vneg", "dd", "hQh", OP_NEG>; |
| + def VRECPEH : SInst<"vrecpe", "dd", "hQh">; |
| + def FRSQRTEH : SInst<"vrsqrte", "dd", "hQh">; |
| |
| // ARMv8.2-A FP16 two-operands vector intrinsics. |
| |
| // Misc. |
| - def VADDH : SOpInst<"vadd", "...", "hQh", OP_ADD>; |
| - def VABDH : SInst<"vabd", "...", "hQh">; |
| - def VSUBH : SOpInst<"vsub", "...", "hQh", OP_SUB>; |
| + def VADDH : SOpInst<"vadd", "ddd", "hQh", OP_ADD>; |
| + def VABDH : SInst<"vabd", "ddd", "hQh">; |
| + def VSUBH : SOpInst<"vsub", "ddd", "hQh", OP_SUB>; |
| |
| // Comparison |
| let InstName = "vacge" in { |
| - def VCAGEH : SInst<"vcage", "U..", "hQh">; |
| - def VCALEH : SInst<"vcale", "U..", "hQh">; |
| + def VCAGEH : SInst<"vcage", "udd", "hQh">; |
| + def VCALEH : SInst<"vcale", "udd", "hQh">; |
| } |
| let InstName = "vacgt" in { |
| - def VCAGTH : SInst<"vcagt", "U..", "hQh">; |
| - def VCALTH : SInst<"vcalt", "U..", "hQh">; |
| + def VCAGTH : SInst<"vcagt", "udd", "hQh">; |
| + def VCALTH : SInst<"vcalt", "udd", "hQh">; |
| } |
| - def VCEQH : SOpInst<"vceq", "U..", "hQh", OP_EQ>; |
| - def VCGEH : SOpInst<"vcge", "U..", "hQh", OP_GE>; |
| - def VCGTH : SOpInst<"vcgt", "U..", "hQh", OP_GT>; |
| + def VCEQH : SOpInst<"vceq", "udd", "hQh", OP_EQ>; |
| + def VCGEH : SOpInst<"vcge", "udd", "hQh", OP_GE>; |
| + def VCGTH : SOpInst<"vcgt", "udd", "hQh", OP_GT>; |
| let InstName = "vcge" in |
| - def VCLEH : SOpInst<"vcle", "U..", "hQh", OP_LE>; |
| + def VCLEH : SOpInst<"vcle", "udd", "hQh", OP_LE>; |
| let InstName = "vcgt" in |
| - def VCLTH : SOpInst<"vclt", "U..", "hQh", OP_LT>; |
| + def VCLTH : SOpInst<"vclt", "udd", "hQh", OP_LT>; |
| |
| // Vector conversion |
| let isVCVT_N = 1 in { |
| - def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs">; |
| - def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh">; |
| - def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh">; |
| + def VCVT_N_F16 : SInst<"vcvt_n_f16", "Hdi", "sUsQsQUs">; |
| + def VCVT_N_S16 : SInst<"vcvt_n_s16", "xdi", "hQh">; |
| + def VCVT_N_U16 : SInst<"vcvt_n_u16", "udi", "hQh">; |
| } |
| |
| // Max/Min |
| - def VMAXH : SInst<"vmax", "...", "hQh">; |
| - def VMINH : SInst<"vmin", "...", "hQh">; |
| + def VMAXH : SInst<"vmax", "ddd", "hQh">; |
| + def VMINH : SInst<"vmin", "ddd", "hQh">; |
| let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { |
| - def FMAXNMH : SInst<"vmaxnm", "...", "hQh">; |
| - def FMINNMH : SInst<"vminnm", "...", "hQh">; |
| + def FMAXNMH : SInst<"vmaxnm", "ddd", "hQh">; |
| + def FMINNMH : SInst<"vminnm", "ddd", "hQh">; |
| } |
| |
| // Multiplication/Division |
| - def VMULH : SOpInst<"vmul", "...", "hQh", OP_MUL>; |
| + def VMULH : SOpInst<"vmul", "ddd", "hQh", OP_MUL>; |
| |
| // Pairwise addition |
| - def VPADDH : SInst<"vpadd", "...", "h">; |
| + def VPADDH : SInst<"vpadd", "ddd", "h">; |
| |
| // Pairwise Max/Min |
| - def VPMAXH : SInst<"vpmax", "...", "h">; |
| - def VPMINH : SInst<"vpmin", "...", "h">; |
| + def VPMAXH : SInst<"vpmax", "ddd", "h">; |
| + def VPMINH : SInst<"vpmin", "ddd", "h">; |
| |
| // Reciprocal/Sqrt |
| - def VRECPSH : SInst<"vrecps", "...", "hQh">; |
| - def VRSQRTSH : SInst<"vrsqrts", "...", "hQh">; |
| + def VRECPSH : SInst<"vrecps", "ddd", "hQh">; |
| + def VRSQRTSH : SInst<"vrsqrts", "ddd", "hQh">; |
| |
| // ARMv8.2-A FP16 three-operands vector intrinsics. |
| |
| // Vector fused multiply-add operations |
| - def VFMAH : SInst<"vfma", "....", "hQh">; |
| - def VFMSH : SOpInst<"vfms", "....", "hQh", OP_FMLS>; |
| + def VFMAH : SInst<"vfma", "dddd", "hQh">; |
| + def VFMSH : SOpInst<"vfms", "dddd", "hQh", OP_FMLS>; |
| |
| // ARMv8.2-A FP16 lane vector intrinsics. |
| |
| // Mul lane |
| - def VMUL_LANEH : IOpInst<"vmul_lane", "..qI", "hQh", OP_MUL_LN>; |
| - def VMUL_NH : IOpInst<"vmul_n", "..1", "hQh", OP_MUL_N>; |
| + def VMUL_LANEH : IOpInst<"vmul_lane", "ddgi", "hQh", OP_MUL_LN>; |
| + def VMUL_NH : IOpInst<"vmul_n", "dds", "hQh", OP_MUL_N>; |
| |
| // Data processing intrinsics - section 5 |
| |
| // Logical operations |
| let isHiddenLInst = 1 in |
| - def VBSLH : SInst<"vbsl", ".U..", "hQh">; |
| + def VBSLH : SInst<"vbsl", "dudd", "hQh">; |
| |
| // Transposition operations |
| - def VZIPH : WInst<"vzip", "2..", "hQh">; |
| - def VUZPH : WInst<"vuzp", "2..", "hQh">; |
| - def VTRNH : WInst<"vtrn", "2..", "hQh">; |
| + def VZIPH : WInst<"vzip", "2dd", "hQh">; |
| + def VUZPH : WInst<"vuzp", "2dd", "hQh">; |
| + def VTRNH : WInst<"vtrn", "2dd", "hQh">; |
| |
| |
| let ArchGuard = "!defined(__aarch64__)" in { |
| // Set all lanes to same value. |
| // Already implemented prior to ARMv8.2-A. |
| - def VMOV_NH : WOpInst<"vmov_n", ".1", "hQh", OP_DUP>; |
| - def VDUP_NH : WOpInst<"vdup_n", ".1", "hQh", OP_DUP>; |
| - def VDUP_LANE1H : WOpInst<"vdup_lane", ".qI", "hQh", OP_DUP_LN>; |
| + def VMOV_NH : WOpInst<"vmov_n", "ds", "hQh", OP_DUP>; |
| + def VDUP_NH : WOpInst<"vdup_n", "ds", "hQh", OP_DUP>; |
| + def VDUP_LANE1H : WOpInst<"vdup_lane", "dgi", "hQh", OP_DUP_LN>; |
| } |
| |
| // Vector Extract |
| - def VEXTH : WInst<"vext", "...I", "hQh">; |
| + def VEXTH : WInst<"vext", "dddi", "hQh">; |
| |
| // Reverse vector elements |
| - def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>; |
| + def VREV64H : WOpInst<"vrev64", "dd", "hQh", OP_REV64>; |
| } |
| |
| // ARMv8.2-A FP16 vector intrinsics for A64 only. |
| let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__)" in { |
| |
| // Vector rounding |
| - def FRINTIH : SInst<"vrndi", "..", "hQh">; |
| + def FRINTIH : SInst<"vrndi", "dd", "hQh">; |
| |
| // Misc. |
| - def FSQRTH : SInst<"vsqrt", "..", "hQh">; |
| + def FSQRTH : SInst<"vsqrt", "dd", "hQh">; |
| |
| // Multiplication/Division |
| - def MULXH : SInst<"vmulx", "...", "hQh">; |
| - def FDIVH : IOpInst<"vdiv", "...", "hQh", OP_DIV>; |
| + def MULXH : SInst<"vmulx", "ddd", "hQh">; |
| + def FDIVH : IOpInst<"vdiv", "ddd", "hQh", OP_DIV>; |
| |
| // Pairwise addition |
| - def VPADDH1 : SInst<"vpadd", "...", "Qh">; |
| + def VPADDH1 : SInst<"vpadd", "ddd", "Qh">; |
| |
| // Pairwise Max/Min |
| - def VPMAXH1 : SInst<"vpmax", "...", "Qh">; |
| - def VPMINH1 : SInst<"vpmin", "...", "Qh">; |
| + def VPMAXH1 : SInst<"vpmax", "ddd", "Qh">; |
| + def VPMINH1 : SInst<"vpmin", "ddd", "Qh">; |
| |
| // Pairwise MaxNum/MinNum |
| - def FMAXNMPH : SInst<"vpmaxnm", "...", "hQh">; |
| - def FMINNMPH : SInst<"vpminnm", "...", "hQh">; |
| + def FMAXNMPH : SInst<"vpmaxnm", "ddd", "hQh">; |
| + def FMINNMPH : SInst<"vpminnm", "ddd", "hQh">; |
| |
| // ARMv8.2-A FP16 lane vector intrinsics. |
| |
| // FMA lane |
| - def VFMA_LANEH : IInst<"vfma_lane", "...qI", "hQh">; |
| - def VFMA_LANEQH : IInst<"vfma_laneq", "...QI", "hQh">; |
| + def VFMA_LANEH : IInst<"vfma_lane", "dddgi", "hQh">; |
| + def VFMA_LANEQH : IInst<"vfma_laneq", "dddji", "hQh">; |
| |
| // FMA lane with scalar argument |
| - def FMLA_NH : SOpInst<"vfma_n", "...1", "hQh", OP_FMLA_N>; |
| + def FMLA_NH : SOpInst<"vfma_n", "ddds", "hQh", OP_FMLA_N>; |
| // Scalar floating point fused multiply-add (scalar, by element) |
| - def SCALAR_FMLA_LANEH : IInst<"vfma_lane", "111.I", "Sh">; |
| - def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh">; |
| + def SCALAR_FMLA_LANEH : IInst<"vfma_lane", "sssdi", "Sh">; |
| + def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "sssji", "Sh">; |
| |
| // FMS lane |
| - def VFMS_LANEH : IOpInst<"vfms_lane", "...qI", "hQh", OP_FMS_LN>; |
| - def VFMS_LANEQH : IOpInst<"vfms_laneq", "...QI", "hQh", OP_FMS_LNQ>; |
| + def VFMS_LANEH : IOpInst<"vfms_lane", "dddgi", "hQh", OP_FMS_LN>; |
| + def VFMS_LANEQH : IOpInst<"vfms_laneq", "dddji", "hQh", OP_FMS_LNQ>; |
| // FMS lane with scalar argument |
| - def FMLS_NH : SOpInst<"vfms_n", "...1", "hQh", OP_FMLS_N>; |
| + def FMLS_NH : SOpInst<"vfms_n", "ddds", "hQh", OP_FMLS_N>; |
| // Scalar floating foint fused multiply-subtract (scalar, by element) |
| - def SCALAR_FMLS_LANEH : IOpInst<"vfms_lane", "111.I", "Sh", OP_FMS_LN>; |
| - def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "111QI", "Sh", OP_FMS_LNQ>; |
| + def SCALAR_FMLS_LANEH : IOpInst<"vfms_lane", "sssdi", "Sh", OP_FMS_LN>; |
| + def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "sssji", "Sh", OP_FMS_LNQ>; |
| |
| // Mul lane |
| - def VMUL_LANEQH : IOpInst<"vmul_laneq", "..QI", "hQh", OP_MUL_LN>; |
| + def VMUL_LANEQH : IOpInst<"vmul_laneq", "ddji", "hQh", OP_MUL_LN>; |
| // Scalar floating point multiply (scalar, by element) |
| - def SCALAR_FMUL_LANEH : IOpInst<"vmul_lane", "11.I", "Sh", OP_SCALAR_MUL_LN>; |
| - def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "11QI", "Sh", OP_SCALAR_MUL_LN>; |
| + def SCALAR_FMUL_LANEH : IOpInst<"vmul_lane", "ssdi", "Sh", OP_SCALAR_MUL_LN>; |
| + def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "ssji", "Sh", OP_SCALAR_MUL_LN>; |
| |
| // Mulx lane |
| - def VMULX_LANEH : IOpInst<"vmulx_lane", "..qI", "hQh", OP_MULX_LN>; |
| - def VMULX_LANEQH : IOpInst<"vmulx_laneq", "..QI", "hQh", OP_MULX_LN>; |
| - def VMULX_NH : IOpInst<"vmulx_n", "..1", "hQh", OP_MULX_N>; |
| + def VMULX_LANEH : IOpInst<"vmulx_lane", "ddgi", "hQh", OP_MULX_LN>; |
| + def VMULX_LANEQH : IOpInst<"vmulx_laneq", "ddji", "hQh", OP_MULX_LN>; |
| + def VMULX_NH : IOpInst<"vmulx_n", "dds", "hQh", OP_MULX_N>; |
| // Scalar floating point mulx (scalar, by element) |
| - def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh">; |
| - def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh">; |
| + def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "ssdi", "Sh">; |
| + def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "ssji", "Sh">; |
| |
| // ARMv8.2-A FP16 reduction vector intrinsics. |
| - def VMAXVH : SInst<"vmaxv", "1.", "hQh">; |
| - def VMINVH : SInst<"vminv", "1.", "hQh">; |
| - def FMAXNMVH : SInst<"vmaxnmv", "1.", "hQh">; |
| - def FMINNMVH : SInst<"vminnmv", "1.", "hQh">; |
| + def VMAXVH : SInst<"vmaxv", "sd", "hQh">; |
| + def VMINVH : SInst<"vminv", "sd", "hQh">; |
| + def FMAXNMVH : SInst<"vmaxnmv", "sd", "hQh">; |
| + def FMINNMVH : SInst<"vminnmv", "sd", "hQh">; |
| |
| // Permutation |
| - def VTRN1H : SOpInst<"vtrn1", "...", "hQh", OP_TRN1>; |
| - def VZIP1H : SOpInst<"vzip1", "...", "hQh", OP_ZIP1>; |
| - def VUZP1H : SOpInst<"vuzp1", "...", "hQh", OP_UZP1>; |
| - def VTRN2H : SOpInst<"vtrn2", "...", "hQh", OP_TRN2>; |
| - def VZIP2H : SOpInst<"vzip2", "...", "hQh", OP_ZIP2>; |
| - def VUZP2H : SOpInst<"vuzp2", "...", "hQh", OP_UZP2>; |
| - |
| - def SCALAR_VDUP_LANEH : IInst<"vdup_lane", "1.I", "Sh">; |
| - def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh">; |
| + def VTRN1H : SOpInst<"vtrn1", "ddd", "hQh", OP_TRN1>; |
| + def VZIP1H : SOpInst<"vzip1", "ddd", "hQh", OP_ZIP1>; |
| + def VUZP1H : SOpInst<"vuzp1", "ddd", "hQh", OP_UZP1>; |
| + def VTRN2H : SOpInst<"vtrn2", "ddd", "hQh", OP_TRN2>; |
| + def VZIP2H : SOpInst<"vzip2", "ddd", "hQh", OP_ZIP2>; |
| + def VUZP2H : SOpInst<"vuzp2", "ddd", "hQh", OP_UZP2>; |
| + |
| + def SCALAR_VDUP_LANEH : IInst<"vdup_lane", "sdi", "Sh">; |
| + def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "sji", "Sh">; |
| } |
| |
| // v8.2-A dot product instructions. |
| let ArchGuard = "defined(__ARM_FEATURE_DOTPROD)" in { |
| - def DOT : SInst<"vdot", "..(<<)(<<)", "iQiUiQUi">; |
| - def DOT_LANE : SOpInst<"vdot_lane", "..(<<)(<<q)I", "iUiQiQUi", OP_DOT_LN>; |
| + def DOT : SInst<"vdot", "dd88", "iQiUiQUi">; |
| + def DOT_LANE : SOpInst<"vdot_lane", "dd87i", "iUiQiQUi", OP_DOT_LN>; |
| } |
| let ArchGuard = "defined(__ARM_FEATURE_DOTPROD) && defined(__aarch64__)" in { |
| // Variants indexing into a 128-bit vector are A64 only. |
| - def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<<Q)I", "iUiQiQUi", OP_DOT_LNQ>; |
| + def UDOT_LANEQ : SOpInst<"vdot_laneq", "dd89i", "iUiQiQUi", OP_DOT_LNQ>; |
| } |
| |
| // v8.2-A FP16 fused multiply-add long instructions. |
| let ArchGuard = "defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)" in { |
| - def VFMLAL_LOW : SInst<"vfmlal_low", ">>..", "hQh">; |
| - def VFMLSL_LOW : SInst<"vfmlsl_low", ">>..", "hQh">; |
| - def VFMLAL_HIGH : SInst<"vfmlal_high", ">>..", "hQh">; |
| - def VFMLSL_HIGH : SInst<"vfmlsl_high", ">>..", "hQh">; |
| - |
| - def VFMLAL_LANE_LOW : SOpInst<"vfmlal_lane_low", "(F>)(F>)F(Fq)I", "hQh", OP_FMLAL_LN>; |
| - def VFMLSL_LANE_LOW : SOpInst<"vfmlsl_lane_low", "(F>)(F>)F(Fq)I", "hQh", OP_FMLSL_LN>; |
| - def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLAL_LN_Hi>; |
| - def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLSL_LN_Hi>; |
| - |
| - def VFMLAL_LANEQ_LOW : SOpInst<"vfmlal_laneq_low", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN>; |
| - def VFMLSL_LANEQ_LOW : SOpInst<"vfmlsl_laneq_low", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN>; |
| - def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi>; |
| - def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi>; |
| + def VFMLAL_LOW : SInst<"vfmlal_low", "nndd", "hQh">; |
| + def VFMLSL_LOW : SInst<"vfmlsl_low", "nndd", "hQh">; |
| + def VFMLAL_HIGH : SInst<"vfmlal_high", "nndd", "hQh">; |
| + def VFMLSL_HIGH : SInst<"vfmlsl_high", "nndd", "hQh">; |
| + |
| + def VFMLAL_LANE_LOW : SOpInst<"vfmlal_lane_low", "ffH0i", "hQh", OP_FMLAL_LN>; |
| + def VFMLSL_LANE_LOW : SOpInst<"vfmlsl_lane_low", "ffH0i", "hQh", OP_FMLSL_LN>; |
| + def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "ffH0i", "hQh", OP_FMLAL_LN_Hi>; |
| + def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "ffH0i", "hQh", OP_FMLSL_LN_Hi>; |
| + |
| + def VFMLAL_LANEQ_LOW : SOpInst<"vfmlal_laneq_low", "ffH1i", "hQh", OP_FMLAL_LN>; |
| + def VFMLSL_LANEQ_LOW : SOpInst<"vfmlsl_laneq_low", "ffH1i", "hQh", OP_FMLSL_LN>; |
| + def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "ffH1i", "hQh", OP_FMLAL_LN_Hi>; |
| + def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "ffH1i", "hQh", OP_FMLSL_LN_Hi>; |
| } |
| diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td |
| index 28b00d162a0..984ed787037 100644 |
| --- a/clang/include/clang/Basic/arm_neon_incl.td |
| +++ b/clang/include/clang/Basic/arm_neon_incl.td |
| @@ -198,8 +198,10 @@ def OP_UNAVAILABLE : Operation { |
| // |
| // The prototype is a string that defines the return type of the intrinsic |
| // and the type of each argument. The return type and every argument gets a |
| -// set of "modifiers" that can change in some way the "base type" of the |
| -// intrinsic. |
| +// "modifier" that can change in some way the "base type" of the intrinsic. |
| +// |
| +// The modifier 'd' means "default" and does not modify the base type in any |
| +// way. The available modifiers are given below. |
| // |
| // Typespecs |
| // --------- |
| @@ -224,34 +226,41 @@ def OP_UNAVAILABLE : Operation { |
| // ------------------- |
| // prototype: return (arg, arg, ...) |
| // |
| -// Each type modifier is either a single character, or a group surrounded by |
| -// parentheses. |
| -// |
| -// .: default |
| -// v: change to void category. |
| -// S: change to signed integer category. |
| -// U: change to unsigned integer category. |
| -// F: change to floating category. |
| -// P: change to polynomial category. |
| -// p: change polynomial to equivalent integer category. Otherwise nop. |
| -// |
| -// >: double element width (vector size unchanged). |
| -// <: half element width (vector size unchanged). |
| -// |
| -// 1: change to scalar. |
| -// 2: change to struct of two vectors. |
| -// 3: change to struct of three vectors. |
| -// 4: change to struct of four vectors. |
| -// |
| -// *: make a pointer argument. |
| -// c: make a constant argument (for pointers). |
| -// |
| -// Q: force 128-bit width. |
| -// q: force 64-bit width. |
| -// |
| -// I: make 32-bit signed scalar immediate |
| -// !: make this the key type passed to CGBuiltin.cpp in a polymorphic call. |
| - |
| +// v: void |
| +// t: best-fit integer (int/poly args) |
| +// x: signed integer (int/float args) |
| +// u: unsigned integer (int/float args) |
| +// f: float (int args) |
| +// F: double (int args) |
| +// H: half (int args) |
| +// 0: half (int args), ignore 'Q' size modifier. |
| +// 1: half (int args), force 'Q' size modifier. |
| +// d: default |
| +// g: default, ignore 'Q' size modifier. |
| +// j: default, force 'Q' size modifier. |
| +// w: double width elements, same num elts |
| +// n: double width elements, half num elts |
| +// h: half width elements, double num elts |
| +// q: half width elements, quad num elts |
| +// e: half width elements, double num elts, unsigned |
| +// m: half width elements, same num elts |
| +// i: constant int |
| +// l: constant uint64 |
| +// s: scalar of element type |
| +// z: scalar of half width element type, signed |
| +// r: scalar of double width element type, signed |
| +// b: scalar of unsigned integer/long type (int/float args) |
| +// $: scalar of signed integer/long type (int/float args) |
| +// y: scalar of float |
| +// o: scalar of double |
| +// k: default elt width, double num elts |
| +// 2,3,4: array of default vectors |
| +// B,C,D: array of default elts, force 'Q' size modifier. |
| +// p: pointer type |
| +// c: const pointer type |
| +// 7: vector of 8-bit elements, ignore 'Q' size modifier |
| +// 8: vector of 8-bit elements, same width as default type |
| +// 9: vector of 8-bit elements, force 'Q' size modifier |
| |
| // Every intrinsic subclasses Inst. |
| class Inst <string n, string p, string t, Operation o> { |
| diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c |
| index 7744b4f4a15..b29d877dd8e 100644 |
| --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c |
| +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c |
| @@ -17756,6 +17756,8 @@ float32_t test_vminnmv_f32(float32x2_t a) { |
| } |
| |
| // CHECK-LABEL: @test_vpaddq_s64( |
| +// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
| +// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
| // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) |
| // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <2 x i64> [[VPADDQ_V2_I]] |
| @@ -17764,6 +17766,8 @@ int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) { |
| } |
| |
| // CHECK-LABEL: @test_vpaddq_u64( |
| +// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
| +// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> |
| // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) |
| // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> |
| // CHECK: ret <2 x i64> [[VPADDQ_V2_I]] |
| diff --git a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c |
| index d2e3bec4e00..836e4dbd991 100644 |
| --- a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c |
| +++ b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c |
| @@ -407,10 +407,12 @@ int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) { |
| } |
| |
| // CHECK-LABEL: define <1 x double> @test_vmulx_lane_f64_0() #0 { |
| -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> <double 0x3FD6304BC43AB5C2>, i32 0 |
| -// CHECK: [[VGET_LANE7:%.*]] = extractelement <1 x double> <double 0x3FEE211E215AEEF3>, i32 0 |
| +// CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double> |
| +// CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double> |
| +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0 |
| +// CHECK: [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 |
| // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]]) |
| -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> <double 0x3FD6304BC43AB5C2>, double [[VMULXD_F64_I]], i32 0 |
| +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0 |
| // CHECK: ret <1 x double> [[VSET_LANE]] |
| float64x1_t test_vmulx_lane_f64_0() { |
| float64x1_t arg1; |
| @@ -424,11 +426,13 @@ float64x1_t test_vmulx_lane_f64_0() { |
| } |
| |
| // CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_2() #1 { |
| -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> <double 0x3FD6304BC43AB5C2>, <1 x double> <double 0x3FEE211E215AEEF3>, <2 x i32> <i32 0, i32 1> |
| -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> <double 0x3FD6304BC43AB5C2>, i32 0 |
| +// CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double> |
| +// CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double> |
| +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1> |
| +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0 |
| // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1 |
| // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) |
| -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> <double 0x3FD6304BC43AB5C2>, double [[VMULXD_F64_I]], i32 0 |
| +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0 |
| // CHECK: ret <1 x double> [[VSET_LANE]] |
| float64x1_t test_vmulx_laneq_f64_2() { |
| float64x1_t arg1; |
| diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp |
| index a0f3fb2ddc0..cdf761b00c6 100644 |
| --- a/clang/utils/TableGen/NeonEmitter.cpp |
| +++ b/clang/utils/TableGen/NeonEmitter.cpp |
| @@ -161,11 +161,11 @@ public: |
| Pointer(false), ScalarForMangling(false), NoManglingQ(false), |
| Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} |
| |
| - Type(TypeSpec TS, StringRef CharMods) |
| + Type(TypeSpec TS, char CharMod) |
| : TS(std::move(TS)), Kind(Void), Immediate(false), |
| Constant(false), Pointer(false), ScalarForMangling(false), |
| NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { |
| - applyModifiers(CharMods); |
| + applyModifier(CharMod); |
| } |
| |
| /// Returns a type representing "void". |
| @@ -181,15 +181,13 @@ public: |
| bool noManglingQ() const { return NoManglingQ; } |
| |
| bool isPointer() const { return Pointer; } |
| - bool isValue() const { return !isVoid() && !isPointer(); } |
| - bool isScalar() const { return isValue() && NumVectors == 0; } |
| - bool isVector() const { return isValue() && NumVectors > 0; } |
| - bool isConstPointer() const { return Constant; } |
| bool isFloating() const { return Kind == Float; } |
| bool isInteger() const { return Kind == SInt || Kind == UInt; } |
| bool isPoly() const { return Kind == Poly; } |
| bool isSigned() const { return Kind == SInt; } |
| bool isImmediate() const { return Immediate; } |
| + bool isScalar() const { return NumVectors == 0; } |
| + bool isVector() const { return NumVectors > 0; } |
| bool isFloat() const { return isFloating() && ElementBitwidth == 32; } |
| bool isDouble() const { return isFloating() && ElementBitwidth == 64; } |
| bool isHalf() const { return isFloating() && ElementBitwidth == 16; } |
| @@ -207,11 +205,11 @@ public: |
| // Mutator functions |
| // |
| void makeUnsigned() { |
| - assert(!isVoid() && "not a potentially signed type"); |
| + assert(isInteger() && "not a potentially signed type"); |
| Kind = UInt; |
| } |
| void makeSigned() { |
| - assert(!isVoid() && "not a potentially signed type"); |
| + assert(isInteger() && "not a potentially signed type"); |
| Kind = SInt; |
| } |
| |
| @@ -269,8 +267,8 @@ private: |
| /// seen. This is needed by applyModifier as some modifiers |
| /// only take effect if the type size was changed by "Q" or "H". |
| void applyTypespec(bool &Quad); |
| - /// Applies prototype modifiers to the type. |
| - void applyModifiers(StringRef Mods); |
| + /// Applies a prototype modifiers to the type. |
| + void applyModifier(char Mod); |
| }; |
| |
| //===----------------------------------------------------------------------===// |
| @@ -301,8 +299,8 @@ class Intrinsic { |
| |
| /// The Record this intrinsic was created from. |
| Record *R; |
| - /// The unmangled name. |
| - std::string Name; |
| + /// The unmangled name and prototype. |
| + std::string Name, Proto; |
| /// The input and output typespecs. InTS == OutTS except when |
| /// CartesianProductOfTypes is 1 - this is the case for vreinterpret. |
| TypeSpec OutTS, InTS; |
| @@ -325,8 +323,6 @@ class Intrinsic { |
| |
| /// The types of return value [0] and parameters [1..]. |
| std::vector<Type> Types; |
| - /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls. |
| - int PolymorphicKeyType; |
| /// The local variables defined. |
| std::map<std::string, Variable> Variables; |
| /// NeededEarly - set if any other intrinsic depends on this intrinsic. |
| @@ -362,39 +358,34 @@ public: |
| Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, |
| TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, |
| StringRef Guard, bool IsUnavailable, bool BigEndianSafe) |
| - : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body), |
| - Guard(Guard.str()), IsUnavailable(IsUnavailable), |
| - BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false), |
| - UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."), |
| - Emitter(Emitter) { |
| + : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS), |
| + CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable), |
| + BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false), |
| + BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) { |
| + // If this builtin takes an immediate argument, we need to #define it rather |
| + // than use a standard declaration, so that SemaChecking can range check |
| + // the immediate passed by the user. |
| + if (Proto.find('i') != std::string::npos) |
| + UseMacro = true; |
| + |
| + // Pointer arguments need to use macros to avoid hiding aligned attributes |
| + // from the pointer type. |
| + if (Proto.find('p') != std::string::npos || |
| + Proto.find('c') != std::string::npos) |
| + UseMacro = true; |
| + |
| + // It is not permitted to pass or return an __fp16 by value, so intrinsics |
| + // taking a scalar float16_t must be implemented as macros. |
| + if (OutTS.find('h') != std::string::npos && |
| + Proto.find('s') != std::string::npos) |
| + UseMacro = true; |
| + |
| // Modify the TypeSpec per-argument to get a concrete Type, and create |
| // known variables for each. |
| // Types[0] is the return value. |
| - unsigned Pos = 0; |
| - Types.emplace_back(OutTS, getNextModifiers(Proto, Pos)); |
| - StringRef Mods = getNextModifiers(Proto, Pos); |
| - while (!Mods.empty()) { |
| - Types.emplace_back(InTS, Mods); |
| - if (Mods.find("!") != StringRef::npos) |
| - PolymorphicKeyType = Types.size() - 1; |
| - |
| - Mods = getNextModifiers(Proto, Pos); |
| - } |
| - |
| - for (auto Type : Types) { |
| - // If this builtin takes an immediate argument, we need to #define it rather |
| - // than use a standard declaration, so that SemaChecking can range check |
| - // the immediate passed by the user. |
| - |
| - // Pointer arguments need to use macros to avoid hiding aligned attributes |
| - // from the pointer type. |
| - |
| - // It is not permitted to pass or return an __fp16 by value, so intrinsics |
| - // taking a scalar float16_t must be implemented as macros. |
| - if (Type.isImmediate() || Type.isPointer() || |
| - (Type.isScalar() && Type.isHalf())) |
| - UseMacro = true; |
| - } |
| + Types.emplace_back(OutTS, Proto[0]); |
| + for (unsigned I = 1; I < Proto.size(); ++I) |
| + Types.emplace_back(InTS, Proto[I]); |
| } |
| |
| /// Get the Record that this intrinsic is based off. |
| @@ -410,24 +401,23 @@ public: |
| |
| /// Return true if the intrinsic takes an immediate operand. |
| bool hasImmediate() const { |
| - return std::any_of(Types.begin(), Types.end(), |
| - [](const Type &T) { return T.isImmediate(); }); |
| + return Proto.find('i') != std::string::npos; |
| } |
| |
| /// Return the parameter index of the immediate operand. |
| unsigned getImmediateIdx() const { |
| - for (unsigned Idx = 0; Idx < Types.size(); ++Idx) |
| - if (Types[Idx].isImmediate()) |
| - return Idx - 1; |
| - llvm_unreachable("Intrinsic has no immediate"); |
| + assert(hasImmediate()); |
| + unsigned Idx = Proto.find('i'); |
| + assert(Idx > 0 && "Can't return an immediate!"); |
| + return Idx - 1; |
| } |
| |
| - |
| - unsigned getNumParams() const { return Types.size() - 1; } |
| + unsigned getNumParams() const { return Proto.size() - 1; } |
| Type getReturnType() const { return Types[0]; } |
| Type getParamType(unsigned I) const { return Types[I + 1]; } |
| Type getBaseType() const { return BaseType; } |
| - Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; } |
| + /// Return the raw prototype string. |
| + std::string getProto() const { return Proto; } |
| |
| /// Return true if the prototype has a scalar argument. |
| bool protoHasScalar() const; |
| @@ -481,8 +471,6 @@ public: |
| void indexBody(); |
| |
| private: |
| - StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const; |
| - |
| std::string mangleName(std::string Name, ClassKind CK) const; |
| |
| void initVariables(); |
| @@ -626,14 +614,10 @@ std::string Type::builtin_str() const { |
| if (isVoid()) |
| return "v"; |
| |
| - if (isPointer()) { |
| + if (Pointer) |
| // All pointers are void pointers. |
| - S = "v"; |
| - if (isConstPointer()) |
| - S += "C"; |
| - S += "*"; |
| - return S; |
| - } else if (isInteger()) |
| + S += "v"; |
| + else if (isInteger()) |
| switch (ElementBitwidth) { |
| case 8: S += "c"; break; |
| case 16: S += "s"; break; |
| @@ -650,11 +634,10 @@ std::string Type::builtin_str() const { |
| default: llvm_unreachable("Unhandled case!"); |
| } |
| |
| - // FIXME: NECESSARY??????????????????????????????????????????????????????????????????????? |
| if (isChar() && !isPointer() && isSigned()) |
| // Make chars explicitly signed. |
| S = "S" + S; |
| - else if (isInteger() && !isSigned()) |
| + else if (!isPointer() && isInteger() && !isSigned()) |
| S = "U" + S; |
| |
| // Constant indices are "int", but have the "constant expression" modifier. |
| @@ -663,8 +646,11 @@ std::string Type::builtin_str() const { |
| S = "I" + S; |
| } |
| |
| - if (isScalar()) |
| + if (isScalar()) { |
| + if (Constant) S += "C"; |
| + if (Pointer) S += "*"; |
| return S; |
| + } |
| |
| std::string Ret; |
| for (unsigned I = 0; I < NumVectors; ++I) |
| @@ -826,77 +812,202 @@ void Type::applyTypespec(bool &Quad) { |
| Bitwidth = Quad ? 128 : 64; |
| } |
| |
| -void Type::applyModifiers(StringRef Mods) { |
| +void Type::applyModifier(char Mod) { |
| bool AppliedQuad = false; |
| applyTypespec(AppliedQuad); |
| |
| - for (char Mod : Mods) { |
| - switch (Mod) { |
| - case '.': |
| - break; |
| - case 'v': |
| - Kind = Void; |
| - break; |
| - case 'S': |
| - Kind = SInt; |
| - break; |
| - case 'U': |
| + switch (Mod) { |
| + case 'v': |
| + Kind = Void; |
| + break; |
| + case 't': |
| + if (isPoly()) |
| Kind = UInt; |
| - break; |
| - case 'F': |
| - Kind = Float; |
| - break; |
| - case 'P': |
| - Kind = Poly; |
| - break; |
| - case '>': |
| - assert(ElementBitwidth < 128); |
| - ElementBitwidth *= 2; |
| - break; |
| - case '<': |
| - assert(ElementBitwidth > 8); |
| - ElementBitwidth /= 2; |
| - break; |
| - case '1': |
| - NumVectors = 0; |
| - break; |
| - case '2': |
| - NumVectors = 2; |
| - break; |
| - case '3': |
| - NumVectors = 3; |
| - break; |
| - case '4': |
| - NumVectors = 4; |
| - break; |
| - case '*': |
| - Pointer = true; |
| - break; |
| - case 'c': |
| - Constant = true; |
| - break; |
| - case 'Q': |
| - Bitwidth = 128; |
| - break; |
| - case 'q': |
| - Bitwidth = 64; |
| - break; |
| - case 'I': |
| - Kind = SInt; |
| - ElementBitwidth = Bitwidth = 32; |
| - NumVectors = 0; |
| - Immediate = true; |
| - break; |
| - case 'p': |
| - if (isPoly()) |
| - Kind = UInt; |
| - break; |
| - case '!': |
| - // Key type, handled elsewhere. |
| - break; |
| - default: |
| - llvm_unreachable("Unhandled character!"); |
| - } |
| + break; |
| + case 'b': |
| + Kind = UInt; |
| + NumVectors = 0; |
| + Bitwidth = ElementBitwidth; |
| + break; |
| + case '$': |
| + Kind = SInt; |
| + NumVectors = 0; |
| + Bitwidth = ElementBitwidth; |
| + break; |
| + case 'u': |
| + Kind = UInt; |
| + break; |
| + case 'x': |
| + assert(!isPoly() && "'u' can't be used with poly types!"); |
| + Kind = SInt; |
| + break; |
| + case 'o': |
| + Bitwidth = ElementBitwidth = 64; |
| + NumVectors = 0; |
| + Kind = Float; |
| + break; |
| + case 'y': |
| + Bitwidth = ElementBitwidth = 32; |
| + NumVectors = 0; |
| + Kind = Float; |
| + break; |
| + case 'Y': |
| + Bitwidth = ElementBitwidth = 16; |
| + NumVectors = 0; |
| + Kind = Float; |
| + break; |
| + case 'I': |
| + Bitwidth = ElementBitwidth = 32; |
| + NumVectors = 0; |
| + Kind = SInt; |
| + break; |
| + case 'L': |
| + Bitwidth = ElementBitwidth = 64; |
| + NumVectors = 0; |
| + Kind = SInt; |
| + break; |
| + case 'U': |
| + Bitwidth = ElementBitwidth = 32; |
| + NumVectors = 0; |
| + Kind = UInt; |
| + break; |
| + case 'O': |
| + Bitwidth = ElementBitwidth = 64; |
| + NumVectors = 0; |
| + Kind = UInt; |
| + break; |
| + case 'f': |
| + Kind = Float; |
| + ElementBitwidth = 32; |
| + break; |
| + case 'F': |
| + Kind = Float; |
| + ElementBitwidth = 64; |
| + break; |
| + case 'H': |
| + Kind = Float; |
| + ElementBitwidth = 16; |
| + break; |
| + case '0': |
| + Kind = Float; |
| + if (AppliedQuad) |
| + Bitwidth /= 2; |
| + ElementBitwidth = 16; |
| + break; |
| + case '1': |
| + Kind = Float; |
| + if (!AppliedQuad) |
| + Bitwidth *= 2; |
| + ElementBitwidth = 16; |
| + break; |
| + case 'g': |
| + if (AppliedQuad) |
| + Bitwidth /= 2; |
| + break; |
| + case 'j': |
| + if (!AppliedQuad) |
| + Bitwidth *= 2; |
| + break; |
| + case 'w': |
| + ElementBitwidth *= 2; |
| + Bitwidth *= 2; |
| + break; |
| + case 'n': |
| + ElementBitwidth *= 2; |
| + break; |
| + case 'i': |
| + Kind = SInt; |
| + ElementBitwidth = Bitwidth = 32; |
| + NumVectors = 0; |
| + Immediate = true; |
| + break; |
| + case 'l': |
| + Kind = UInt; |
| + ElementBitwidth = Bitwidth = 64; |
| + NumVectors = 0; |
| + Immediate = true; |
| + break; |
| + case 'z': |
| + ElementBitwidth /= 2; |
| + Bitwidth = ElementBitwidth; |
| + NumVectors = 0; |
| + break; |
| + case 'r': |
| + ElementBitwidth *= 2; |
| + Bitwidth = ElementBitwidth; |
| + NumVectors = 0; |
| + break; |
| + case 's': |
| + Bitwidth = ElementBitwidth; |
| + NumVectors = 0; |
| + break; |
| + case 'k': |
| + Bitwidth *= 2; |
| + break; |
| + case 'c': |
| + Constant = true; |
| + LLVM_FALLTHROUGH; |
| + case 'p': |
| + Pointer = true; |
| + Bitwidth = ElementBitwidth; |
| + NumVectors = 0; |
| + break; |
| + case 'h': |
| + ElementBitwidth /= 2; |
| + break; |
| + case 'q': |
| + ElementBitwidth /= 2; |
| + Bitwidth *= 2; |
| + break; |
| + case 'e': |
| + ElementBitwidth /= 2; |
| + Kind = UInt; |
| + break; |
| + case 'm': |
| + ElementBitwidth /= 2; |
| + Bitwidth /= 2; |
| + break; |
| + case 'd': |
| + break; |
| + case '2': |
| + NumVectors = 2; |
| + break; |
| + case '3': |
| + NumVectors = 3; |
| + break; |
| + case '4': |
| + NumVectors = 4; |
| + break; |
| + case 'B': |
| + NumVectors = 2; |
| + if (!AppliedQuad) |
| + Bitwidth *= 2; |
| + break; |
| + case 'C': |
| + NumVectors = 3; |
| + if (!AppliedQuad) |
| + Bitwidth *= 2; |
| + break; |
| + case 'D': |
| + NumVectors = 4; |
| + if (!AppliedQuad) |
| + Bitwidth *= 2; |
| + break; |
| + case '7': |
| + if (AppliedQuad) |
| + Bitwidth /= 2; |
| + ElementBitwidth = 8; |
| + break; |
| + case '8': |
| + ElementBitwidth = 8; |
| + break; |
| + case '9': |
| + if (!AppliedQuad) |
| + Bitwidth *= 2; |
| + ElementBitwidth = 8; |
| + break; |
| + default: |
| + llvm_unreachable("Unhandled character!"); |
| } |
| } |
| |
| @@ -904,19 +1015,6 @@ void Type::applyModifiers(StringRef Mods) { |
| // Intrinsic implementation |
| //===----------------------------------------------------------------------===// |
| |
| -StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const { |
| - if (Proto.size() == Pos) |
| - return StringRef(); |
| - else if (Proto[Pos] != '(') |
| - return Proto.substr(Pos++, 1); |
| - |
| - size_t Start = Pos + 1; |
| - size_t End = Proto.find(')', Start); |
| - assert_with_loc(End != StringRef::npos, "unmatched modifier group paren"); |
| - Pos = End + 1; |
| - return Proto.slice(Start, End); |
| -} |
| - |
| std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { |
| char typeCode = '\0'; |
| bool printNumber = true; |
| @@ -955,13 +1053,17 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { |
| return S; |
| } |
| |
| +static bool isFloatingPointProtoModifier(char Mod) { |
| + return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I'; |
| +} |
| + |
| std::string Intrinsic::getBuiltinTypeStr() { |
| ClassKind LocalCK = getClassKind(true); |
| std::string S; |
| |
| Type RetT = getReturnType(); |
| if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && |
| - !RetT.isFloating()) |
| + !RetT.isFloating() && !RetT.isVoid()) |
| RetT.makeInteger(RetT.getElementSizeInBits(), false); |
| |
| // Since the return value must be one type, return a vector type of the |
| @@ -976,7 +1078,7 @@ std::string Intrinsic::getBuiltinTypeStr() { |
| if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned()) |
| RetT.makeSigned(); |
| |
| - if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar()) |
| + if (LocalCK == ClassB && !RetT.isVoid() && !RetT.isScalar()) |
| // Cast to vector of 8-bit elements. |
| RetT.makeInteger(8, true); |
| |
| @@ -1092,7 +1194,7 @@ void Intrinsic::initVariables() { |
| |
| // Modify the TypeSpec per-argument to get a concrete Type, and create |
| // known variables for each. |
| - for (unsigned I = 1; I < Types.size(); ++I) { |
| + for (unsigned I = 1; I < Proto.size(); ++I) { |
| char NameC = '0' + (I - 1); |
| std::string Name = "p"; |
| Name.push_back(NameC); |
| @@ -1213,7 +1315,7 @@ void Intrinsic::emitShadowedArgs() { |
| for (unsigned I = 0; I < getNumParams(); ++I) { |
| // Do not create a temporary for an immediate argument. |
| // That would defeat the whole point of using a macro! |
| - if (getParamType(I).isImmediate()) |
| + if (hasImmediate() && Proto[I+1] == 'i') |
| continue; |
| // Do not create a temporary for pointer arguments. The input |
| // pointer may have an alignment hint. |
| @@ -1237,9 +1339,13 @@ void Intrinsic::emitShadowedArgs() { |
| } |
| |
| bool Intrinsic::protoHasScalar() const { |
| - return std::any_of(Types.begin(), Types.end(), [](const Type &T) { |
| - return T.isScalar() && !T.isImmediate(); |
| - }); |
| + return (Proto.find('s') != std::string::npos || |
| + Proto.find('z') != std::string::npos || |
| + Proto.find('r') != std::string::npos || |
| + Proto.find('b') != std::string::npos || |
| + Proto.find('$') != std::string::npos || |
| + Proto.find('y') != std::string::npos || |
| + Proto.find('o') != std::string::npos); |
| } |
| |
| void Intrinsic::emitBodyAsBuiltinCall() { |
| @@ -1302,7 +1408,13 @@ void Intrinsic::emitBodyAsBuiltinCall() { |
| |
| // Extra constant integer to hold type class enum for this function, e.g. s8 |
| if (getClassKind(true) == ClassB) { |
| - S += utostr(getPolymorphicKeyType().getNeonEnum()); |
| + Type ThisTy = getReturnType(); |
| + if (Proto[0] == 'v' || isFloatingPointProtoModifier(Proto[0])) |
| + ThisTy = getParamType(0); |
| + if (ThisTy.isPointer()) |
| + ThisTy = getParamType(1); |
| + |
| + S += utostr(ThisTy.getNeonEnum()); |
| } else { |
| // Remove extraneous ", ". |
| S.pop_back(); |
| @@ -1907,9 +2019,9 @@ void NeonEmitter::createIntrinsic(Record *R, |
| std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; |
| for (auto TS : TypeSpecs) { |
| if (CartesianProductOfTypes) { |
| - Type DefaultT(TS, "."); |
| + Type DefaultT(TS, 'd'); |
| for (auto SrcTS : TypeSpecs) { |
| - Type DefaultSrcT(SrcTS, "."); |
| + Type DefaultSrcT(SrcTS, 'd'); |
| if (TS == SrcTS || |
| DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) |
| continue; |
| @@ -1989,19 +2101,31 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, |
| continue; |
| |
| uint64_t Mask = 0ULL; |
| - Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum(); |
| + Type Ty = Def->getReturnType(); |
| + if (Def->getProto()[0] == 'v' || |
| + isFloatingPointProtoModifier(Def->getProto()[0])) |
| + Ty = Def->getParamType(0); |
| + if (Ty.isPointer()) |
| + Ty = Def->getParamType(1); |
| + |
| + Mask |= 1ULL << Ty.getNeonEnum(); |
| |
| // Check if the function has a pointer or const pointer argument. |
| + std::string Proto = Def->getProto(); |
| int PtrArgNum = -1; |
| bool HasConstPtr = false; |
| for (unsigned I = 0; I < Def->getNumParams(); ++I) { |
| - const auto &Type = Def->getParamType(I); |
| - if (Type.isPointer()) { |
| + char ArgType = Proto[I + 1]; |
| + if (ArgType == 'c') { |
| + HasConstPtr = true; |
| PtrArgNum = I; |
| - HasConstPtr = Type.isConstPointer(); |
| + break; |
| + } |
| + if (ArgType == 'p') { |
| + PtrArgNum = I; |
| + break; |
| } |
| } |
| - |
| // For sret builtins, adjust the pointer argument index. |
| if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) |
| PtrArgNum += 1; |
| @@ -2225,7 +2349,7 @@ void NeonEmitter::run(raw_ostream &OS) { |
| bool InIfdef = false; |
| for (auto &TS : TDTypeVec) { |
| bool IsA64 = false; |
| - Type T(TS, "."); |
| + Type T(TS, 'd'); |
| if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64)) |
| IsA64 = true; |
| |
| @@ -2258,7 +2382,7 @@ void NeonEmitter::run(raw_ostream &OS) { |
| for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { |
| for (auto &TS : TDTypeVec) { |
| bool IsA64 = false; |
| - Type T(TS, "."); |
| + Type T(TS, 'd'); |
| if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64)) |
| IsA64 = true; |
| |
| @@ -2271,8 +2395,8 @@ void NeonEmitter::run(raw_ostream &OS) { |
| InIfdef = true; |
| } |
| |
| - const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0}; |
| - Type VT(TS, Mods); |
| + char M = '2' + (NumMembers - 2); |
| + Type VT(TS, M); |
| OS << "typedef struct " << VT.str() << " {\n"; |
| OS << " " << T.str() << " val"; |
| OS << "[" << NumMembers << "]"; |
| diff --git a/clang/utils/convert_arm_neon.py b/clang/utils/convert_arm_neon.py |
| deleted file mode 100644 |
| index c4b36452945..00000000000 |
| --- a/clang/utils/convert_arm_neon.py |
| +++ /dev/null |
| @@ -1,172 +0,0 @@ |
| -#!/usr/bin/env python3 |
| - |
| -# This script was committed on 20/11/2019 and it would probably make sense to remove |
| -# it after the next release branches. |
| - |
| -# This script is pipe based and converts an arm_neon.td (or arm_fp16.td) file |
| -# using the old single-char type modifiers to an equivalent new-style form where |
| -# each modifier is orthogonal and they can be composed. |
| -# |
| -# It was used to directly generate the .td files on master, so if you have any |
| -# local additions I would suggest implementing any modifiers here, and running |
| -# it over your entire pre-merge .td files rather than trying to resolve any |
| -# conflicts manually. |
| - |
| -import re, sys |
| -MOD_MAP = { |
| - 'v': 'v', |
| - 'x': 'S', |
| - 'u': 'U', |
| - 'd': '.', |
| - 'g': 'q', |
| - 'j': 'Q', |
| - 'w': '>Q', |
| - 'n': '>', |
| - 'h': '<', |
| - 'q': '<Q', |
| - 'e': '<U', |
| - 'm': '<q', |
| - 'i': 'I', |
| - 'l': 'IU>', |
| - 's': '1', |
| - 'z': '1<', |
| - 'r': '1>', |
| - 'b': '1U', |
| - '$': '1S', |
| - 'k': 'Q', |
| - '2': '2', |
| - '3': '3', |
| - '4': '4', |
| - 'B': '2Q', |
| - 'C': '3Q', |
| - 'D': '4Q', |
| - 'p': '*', |
| - 'c': 'c*', |
| - '7': '<<q', |
| - '8': '<<', |
| - '9': '<<Q', |
| - 't': 'p' |
| - } |
| - |
| - |
| -def typespec_elt_size(typespec): |
| - if 'c' in typespec: |
| - return 8 |
| - elif 's' in typespec or 'h' in typespec: |
| - return 16 |
| - elif 'i' in typespec or 'f' in typespec: |
| - return 32 |
| - elif 'l' in typespec or 'd' in typespec: |
| - return 64 |
| - elif 'k' in typespec: |
| - return 128 |
| - |
| -def get_resize(cur, desired): |
| - res = '' |
| - while cur < desired: |
| - res += '>' |
| - cur *= 2 |
| - while cur > desired: |
| - res += '<' |
| - cur /= 2 |
| - return res |
| - |
| - |
| -def remap_protocol(proto, typespec, name): |
| - key_type = 0 |
| - |
| - # Conversions like to see the integer type so they know signedness. |
| - if 'vcvt' in name and '_f' in name and name != 'vcvt_f32_f64' and name != 'vcvt_f64_f32': |
| - key_type = 1 |
| - default_width = typespec_elt_size(typespec) |
| - inconsistent_width = False |
| - for elt in typespec: |
| - new_width = typespec_elt_size(elt) |
| - if new_width and new_width != default_width: |
| - inconsistent_width = True |
| - |
| - res = '' |
| - for i, c in enumerate(proto): |
| - # void and pointers make for bad discriminators in CGBuiltin.cpp. |
| - if c in 'vcp': |
| - key_type += 1 |
| - |
| - if c in MOD_MAP: |
| - cur_mod = MOD_MAP[c] |
| - elif inconsistent_width: |
| - # Otherwise it's a fixed output width modifier. |
| - sys.stderr.write(f'warning: {name} uses fixed output size but has inconsistent input widths: {proto} {typespec}\n') |
| - |
| - if c == 'Y': |
| - # y: scalar of half float |
| - resize = get_resize(default_width, 16) |
| - cur_mod = f'1F{resize}' |
| - elif c == 'y': |
| - # y: scalar of float |
| - resize = get_resize(default_width, 32) |
| - cur_mod = f'1F{resize}' |
| - elif c == 'o': |
| - # o: scalar of double |
| - resize = get_resize(default_width, 64) |
| - cur_mod = f'1F{resize}' |
| - elif c == 'I': |
| - # I: scalar of 32-bit signed |
| - resize = get_resize(default_width, 32) |
| - cur_mod = f'1S{resize}' |
| - elif c == 'L': |
| - # L: scalar of 64-bit signed |
| - resize = get_resize(default_width, 64) |
| - cur_mod = f'1S{resize}' |
| - elif c == 'U': |
| - # I: scalar of 32-bit unsigned |
| - resize = get_resize(default_width, 32) |
| - cur_mod = f'1U{resize}' |
| - elif c == 'O': |
| - # O: scalar of 64-bit unsigned |
| - resize = get_resize(default_width, 64) |
| - cur_mod = f'1U{resize}' |
| - elif c == 'f': |
| - # f: float (int args) |
| - resize = get_resize(default_width, 32) |
| - cur_mod = f'F{resize}' |
| - elif c == 'F': |
| - # F: double (int args) |
| - resize = get_resize(default_width, 64) |
| - cur_mod = f'F{resize}' |
| - elif c == 'H': |
| - # H: half (int args) |
| - resize = get_resize(default_width, 16) |
| - cur_mod = f'F{resize}' |
| - elif c == '0': |
| - # 0: half (int args), ignore 'Q' size modifier. |
| - resize = get_resize(default_width, 16) |
| - cur_mod = f'Fq{resize}' |
| - elif c == '1': |
| - # 1: half (int args), force 'Q' size modifier. |
| - resize = get_resize(default_width, 16) |
| - cur_mod = f'FQ{resize}' |
| - |
| - if len(cur_mod) == 0: |
| - raise Exception(f'WTF: {c} in {name}') |
| - |
| - if key_type != 0 and key_type == i: |
| - cur_mod += '!' |
| - |
| - if len(cur_mod) == 1: |
| - res += cur_mod |
| - else: |
| - res += '(' + cur_mod + ')' |
| - |
| - return res |
| - |
| -def replace_insts(m): |
| - start, end = m.span('proto') |
| - start -= m.start() |
| - end -= m.start() |
| - new_proto = remap_protocol(m['proto'], m['kinds'], m['name']) |
| - return m.group()[:start] + new_proto + m.group()[end:] |
| - |
| -INST = re.compile(r'Inst<"(?P<name>.*?)",\s*"(?P<proto>.*?)",\s*"(?P<kinds>.*?)"') |
| - |
| -new_td = INST.sub(replace_insts, sys.stdin.read()) |
| -sys.stdout.write(new_td) |