| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "make_big|kernel|local_test" --version 5 |
| // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -disable-llvm-passes -x hip -std=c++17 %s -o - | FileCheck %s |
| // REQUIRES: amdgpu-registered-target |
| |
| // Verify that when a function returning an aggregate via sret is called with a |
| // destination in a different address space (e.g. global pointer from kernel |
| // arg), the compiler materialises a temporary in the alloca AS and copies back, |
| // rather than emitting an invalid addrspacecast of the destination pointer. |
| |
| typedef __SIZE_TYPE__ size_t; |
| __attribute__((device)) void *operator new(size_t, void *p) noexcept { return p; } |
| |
| struct Big { |
| int v[32]; |
| __attribute__((device)) Big(int x) { |
| for (int i = 0; i < 32; ++i) |
| v[i] = x + i; |
| } |
| }; |
| |
| // CHECK-LABEL: define dso_local void @_Z8make_bigv( |
| // CHECK-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_BIG:%.*]]) align 4 [[AGG_RESULT:%.*]]) #[[ATTR0:[0-9]+]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr |
| // CHECK-NEXT: call void @_ZN3BigC1Ei(ptr noundef nonnull align 4 dereferenceable(128) [[AGG_RESULT_ASCAST]], i32 noundef 7) #[[ATTR3:[0-9]+]] |
| // CHECK-NEXT: ret void |
| // |
| __attribute__((device)) Big make_big() { return Big(7); } |
| |
| // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z6kernelP3Big( |
| // CHECK-SAME: ptr addrspace(1) noundef [[OUT_COERCE:%.*]]) #[[ATTR1:[0-9]+]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5) |
| // CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr |
| // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr |
| // CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE]], ptr [[OUT_ASCAST]], align 8 |
| // CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8 |
| // CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 |
| // CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[TMP]]) #[[ATTR3]] |
| // CHECK-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP0]], ptr addrspace(5) align 4 [[TMP]], i64 128, i1 false) |
| // CHECK-NEXT: ret void |
| // |
| __attribute__((global)) void kernel(Big *out) { |
| new (out) Big(make_big()); |
| } |
| |
| // If the destination is ultimately backed by alloca AS (even through cast |
| // chains), we should pass it directly as sret and avoid an extra temp/copy. |
| // CHECK-LABEL: define dso_local void @_Z10local_testv( |
| // CHECK-SAME: ) #[[ATTR0]] { |
| // CHECK-NEXT: [[ENTRY:.*:]] |
| // CHECK-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5) |
| // CHECK-NEXT: [[LOCAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL]] to ptr |
| // CHECK-NEXT: [[LOCAL_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_ASCAST]] to ptr addrspace(5) |
| // CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[LOCAL_ASCAST_ASCAST]]) #[[ATTR3]] |
| // CHECK-NEXT: ret void |
| // |
| __attribute__((device)) void local_test() { |
| Big local = make_big(); |
| } |