blob: 48a401baf9a784447b065d4d6a98af8af47a3107 [file] [edit]
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "make_big|kernel|local_test" --version 5
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -disable-llvm-passes -x hip -std=c++17 %s -o - | FileCheck %s
// REQUIRES: amdgpu-registered-target
// Verify that when a function returning an aggregate via sret is called with a
// destination in a different address space (e.g. global pointer from kernel
// arg), the compiler materialises a temporary in the alloca AS and copies back,
// rather than emitting an invalid addrspacecast of the destination pointer.
typedef __SIZE_TYPE__ size_t;
__attribute__((device)) void *operator new(size_t, void *p) noexcept { return p; }
struct Big {
int v[32];
__attribute__((device)) Big(int x) {
for (int i = 0; i < 32; ++i)
v[i] = x + i;
}
};
// CHECK-LABEL: define dso_local void @_Z8make_bigv(
// CHECK-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_BIG:%.*]]) align 4 [[AGG_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
// CHECK-NEXT: call void @_ZN3BigC1Ei(ptr noundef nonnull align 4 dereferenceable(128) [[AGG_RESULT_ASCAST]], i32 noundef 7) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: ret void
//
__attribute__((device)) Big make_big() { return Big(7); }
// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z6kernelP3Big(
// CHECK-SAME: ptr addrspace(1) noundef [[OUT_COERCE:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5)
// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE]], ptr [[OUT_ASCAST]], align 8
// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[TMP]]) #[[ATTR3]]
// CHECK-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP0]], ptr addrspace(5) align 4 [[TMP]], i64 128, i1 false)
// CHECK-NEXT: ret void
//
__attribute__((global)) void kernel(Big *out) {
new (out) Big(make_big());
}
// If the destination is ultimately backed by alloca AS (even through cast
// chains), we should pass it directly as sret and avoid an extra temp/copy.
// CHECK-LABEL: define dso_local void @_Z10local_testv(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5)
// CHECK-NEXT: [[LOCAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL]] to ptr
// CHECK-NEXT: [[LOCAL_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_ASCAST]] to ptr addrspace(5)
// CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[LOCAL_ASCAST_ASCAST]]) #[[ATTR3]]
// CHECK-NEXT: ret void
//
__attribute__((device)) void local_test() {
Big local = make_big();
}