blob: eff1e2267f5c299f826b6fceb6da30b2cbe96087 [file] [log] [blame]
commit 127091bfd5edf10495fee4724fd21c666e5d79c1
Author: Artem Belevich <tra@google.com>
Date: Thu Jan 14 16:05:33 2021 -0800
[CUDA] Normalize handling of defauled dtor.
Defaulted destructor was treated inconsistently, compared to other
compiler-generated functions.
When Sema::IdentifyCUDATarget() got called on just-created dtor which didn't
have implicit __host__ __device__ attributes applied yet, it would treat it as a
host function. That happened to (sometimes) hide the error when dtor referred
to a host-only functions.
Even when we had identified defaulted dtor as a HD function, we still treated it
inconsistently during selection of usual deallocators, where we did not allow
referring to wrong-side functions, while it is allowed for other HD functions.
This change brings handling of defaulted dtors in line with other HD functions.
Differential Revision: https://reviews.llvm.org/D94732
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 0f06adf38f7a..ee91eb4c5deb 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -123,7 +123,8 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
return CFT_Device;
} else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
return CFT_Host;
- } else if (D->isImplicit() && !IgnoreImplicitHDAttr) {
+ } else if ((D->isImplicit() || !D->isUserProvided()) &&
+ !IgnoreImplicitHDAttr) {
// Some implicit declarations (like intrinsic functions) are not marked.
// Set the most lenient target on them for maximal flexibility.
return CFT_HostDevice;
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 1ee52107c3da..d91db60f17a0 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1527,9 +1527,24 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
bool Sema::isUsualDeallocationFunction(const CXXMethodDecl *Method) {
// [CUDA] Ignore this function, if we can't call it.
const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
- if (getLangOpts().CUDA &&
- IdentifyCUDAPreference(Caller, Method) <= CFP_WrongSide)
- return false;
+ if (getLangOpts().CUDA) {
+ auto CallPreference = IdentifyCUDAPreference(Caller, Method);
+ // If it's not callable at all, it's not the right function.
+ if (CallPreference < CFP_WrongSide)
+ return false;
+ if (CallPreference == CFP_WrongSide) {
+ // Maybe. We have to check if there are better alternatives.
+ DeclContext::lookup_result R =
+ Method->getDeclContext()->lookup(Method->getDeclName());
+ for (const auto *D : R) {
+ if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
+ if (IdentifyCUDAPreference(Caller, FD) > CFP_WrongSide)
+ return false;
+ }
+ }
+ // We've found no better variants.
+ }
+ }
SmallVector<const FunctionDecl*, 4> PreventedBy;
bool Result = Method->isUsualDeallocationFunction(PreventedBy);
diff --git a/clang/test/CodeGenCUDA/usual-deallocators.cu b/clang/test/CodeGenCUDA/usual-deallocators.cu
index 7e7752497f34..6f4cc267a23f 100644
--- a/clang/test/CodeGenCUDA/usual-deallocators.cu
+++ b/clang/test/CodeGenCUDA/usual-deallocators.cu
@@ -12,6 +12,19 @@ extern "C" __host__ void host_fn();
extern "C" __device__ void dev_fn();
extern "C" __host__ __device__ void hd_fn();
+// Destructors are handled a bit differently, compared to regular functions.
+// Make sure we do trigger kernel generation on the GPU side even if it's only
+// referenced by the destructor.
+template<typename T> __global__ void f(T) {}
+template<typename T> struct A {
+ ~A() { f<<<1, 1>>>(T()); }
+};
+
+// HOST-LABEL: @a
+A<int> a;
+// HOST-LABEL: define linkonce_odr void @_ZN1AIiED1Ev
+// search further down for the deice-side checks for @_Z1fIiEvT_
+
struct H1D1 {
__host__ void operator delete(void *) { host_fn(); };
__device__ void operator delete(void *) { dev_fn(); };
@@ -95,6 +108,9 @@ __host__ __device__ void tests_hd(void *t) {
test_hd<H1H2D1D2>(t);
}
+// Make sure that we've generated the kernel used by A::~A.
+// DEVICE-LABEL: define dso_local void @_Z1fIiEvT_
+
// Make sure we've picked deallocator for the correct side of compilation.
// COMMON-LABEL: define linkonce_odr void @_ZN4H1D1dlEPv(i8* %0)
@@ -131,3 +147,5 @@ __host__ __device__ void tests_hd(void *t) {
// COMMON-LABEL: define linkonce_odr void @_ZN8H1H2D1D2dlEPv(i8* %0)
// DEVICE: call void @dev_fn()
// HOST: call void @host_fn()
+
+// DEVICE: !0 = !{void (i32)* @_Z1fIiEvT_, !"kernel", i32 1}
diff --git a/clang/test/SemaCUDA/usual-deallocators.cu b/clang/test/SemaCUDA/usual-deallocators.cu
index a0238649c6dc..3670a3bf32c3 100644
--- a/clang/test/SemaCUDA/usual-deallocators.cu
+++ b/clang/test/SemaCUDA/usual-deallocators.cu
@@ -93,3 +93,12 @@ __host__ __device__ void tests_hd(void *t) {
test_hd<H1H2D2>(t);
test_hd<H1H2D1D2>(t);
}
+
+// This should produce no errors. Defaulted destructor should be treated as HD,
+// which allows referencing host-only `operator delete` with a deferred
+// diagnostics that would fire if we ever attempt to codegen it on device..
+struct H {
+ virtual ~H() = default;
+ static void operator delete(void *) {}
+};
+H h;