sys-devel/llvm/files/cherry/127091bfd5edf10495fee4724fd21c666e5d79c1.patch - third_party/overlays/chromiumos-overlay - Git at Google

 commit 127091bfd5edf10495fee4724fd21c666e5d79c1
 Author: Artem Belevich <tra@google.com>
 Date:   Thu Jan 14 16:05:33 2021 -0800

     [CUDA] Normalize handling of defauled dtor.

     Defaulted destructor was treated inconsistently, compared to other
     compiler-generated functions.

     When Sema::IdentifyCUDATarget() got called on just-created dtor which didn't
     have implicit __host__ __device__ attributes applied yet, it would treat it as a
     host function.  That happened to (sometimes) hide the error when dtor referred
     to a host-only functions.

     Even when we had identified defaulted dtor as a HD function, we still treated it
     inconsistently during selection of usual deallocators, where we did not allow
     referring to wrong-side functions, while it is allowed for other HD functions.

     This change brings handling of defaulted dtors in line with other HD functions.

     Differential Revision: https://reviews.llvm.org/D94732

 diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
 index 0f06adf38f7a..ee91eb4c5deb 100644
 --- a/clang/lib/Sema/SemaCUDA.cpp
 +++ b/clang/lib/Sema/SemaCUDA.cpp
 @@ -123,7 +123,8 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
      return CFT_Device;
    } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
      return CFT_Host;
 -  } else if (D->isImplicit() && !IgnoreImplicitHDAttr) {
 +  } else if ((D->isImplicit() || !D->isUserProvided()) &&
 +             !IgnoreImplicitHDAttr) {
      // Some implicit declarations (like intrinsic functions) are not marked.
      // Set the most lenient target on them for maximal flexibility.
      return CFT_HostDevice;
 diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
 index 1ee52107c3da..d91db60f17a0 100644
 --- a/clang/lib/Sema/SemaExprCXX.cpp
 +++ b/clang/lib/Sema/SemaExprCXX.cpp
 @@ -1527,9 +1527,24 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
  bool Sema::isUsualDeallocationFunction(const CXXMethodDecl *Method) {
    // [CUDA] Ignore this function, if we can't call it.
    const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
 -  if (getLangOpts().CUDA &&
 -      IdentifyCUDAPreference(Caller, Method) <= CFP_WrongSide)
 -    return false;
 +  if (getLangOpts().CUDA) {
 +    auto CallPreference = IdentifyCUDAPreference(Caller, Method);
 +    // If it's not callable at all, it's not the right function.
 +    if (CallPreference < CFP_WrongSide)
 +      return false;
 +    if (CallPreference == CFP_WrongSide) {
 +      // Maybe. We have to check if there are better alternatives.
 +      DeclContext::lookup_result R =
 +          Method->getDeclContext()->lookup(Method->getDeclName());
 +      for (const auto *D : R) {
 +        if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
 +          if (IdentifyCUDAPreference(Caller, FD) > CFP_WrongSide)
 +            return false;
 +        }
 +      }
 +      // We've found no better variants.
 +    }
 +  }

    SmallVector<const FunctionDecl*, 4> PreventedBy;
    bool Result = Method->isUsualDeallocationFunction(PreventedBy);
 diff --git a/clang/test/CodeGenCUDA/usual-deallocators.cu b/clang/test/CodeGenCUDA/usual-deallocators.cu
 index 7e7752497f34..6f4cc267a23f 100644
 --- a/clang/test/CodeGenCUDA/usual-deallocators.cu
 +++ b/clang/test/CodeGenCUDA/usual-deallocators.cu
 @@ -12,6 +12,19 @@ extern "C" __host__ void host_fn();
  extern "C" __device__ void dev_fn();
  extern "C" __host__ __device__ void hd_fn();

 +// Destructors are handled a bit differently, compared to regular functions.
 +// Make sure we do trigger kernel generation on the GPU side even if it's only
 +// referenced by the destructor.
 +template<typename T> __global__ void f(T) {}
 +template<typename T> struct A {
 +  ~A() { f<<<1, 1>>>(T()); }
 +};
 +
 +// HOST-LABEL: @a
 +A<int> a;
 +// HOST-LABEL: define linkonce_odr void @_ZN1AIiED1Ev
 +// search further down for the deice-side checks for @_Z1fIiEvT_
 +
  struct H1D1 {
    __host__ void operator delete(void *) { host_fn(); };
    __device__ void operator delete(void *) { dev_fn(); };
 @@ -95,6 +108,9 @@ __host__ __device__ void tests_hd(void *t) {
    test_hd<H1H2D1D2>(t);
  }

 +// Make sure that we've generated the kernel used by A::~A.
 +// DEVICE-LABEL: define dso_local void @_Z1fIiEvT_
 +
  // Make sure we've picked deallocator for the correct side of compilation.

  // COMMON-LABEL: define  linkonce_odr void @_ZN4H1D1dlEPv(i8* %0)
 @@ -131,3 +147,5 @@ __host__ __device__ void tests_hd(void *t) {
  // COMMON-LABEL: define  linkonce_odr void @_ZN8H1H2D1D2dlEPv(i8* %0)
  // DEVICE: call void @dev_fn()
  // HOST: call void @host_fn()
 +
 +// DEVICE: !0 = !{void (i32)* @_Z1fIiEvT_, !"kernel", i32 1}
 diff --git a/clang/test/SemaCUDA/usual-deallocators.cu b/clang/test/SemaCUDA/usual-deallocators.cu
 index a0238649c6dc..3670a3bf32c3 100644
 --- a/clang/test/SemaCUDA/usual-deallocators.cu
 +++ b/clang/test/SemaCUDA/usual-deallocators.cu
 @@ -93,3 +93,12 @@ __host__ __device__ void tests_hd(void *t) {
    test_hd<H1H2D2>(t);
    test_hd<H1H2D1D2>(t);
  }
 +
 +// This should produce no errors.  Defaulted destructor should be treated as HD,
 +// which allows referencing host-only `operator delete` with a deferred
 +// diagnostics that would fire if we ever attempt to codegen it on device..
 +struct H {
 +  virtual ~H() = default;
 +  static void operator delete(void *) {}
 +};
 +H h;
	commit 127091bfd5edf10495fee4724fd21c666e5d79c1
	Author: Artem Belevich <tra@google.com>
	Date: Thu Jan 14 16:05:33 2021 -0800

	[CUDA] Normalize handling of defauled dtor.

	Defaulted destructor was treated inconsistently, compared to other
	compiler-generated functions.

	When Sema::IdentifyCUDATarget() got called on just-created dtor which didn't
	have implicit __host__ __device__ attributes applied yet, it would treat it as a
	host function. That happened to (sometimes) hide the error when dtor referred
	to a host-only functions.

	Even when we had identified defaulted dtor as a HD function, we still treated it
	inconsistently during selection of usual deallocators, where we did not allow
	referring to wrong-side functions, while it is allowed for other HD functions.

	This change brings handling of defaulted dtors in line with other HD functions.

	Differential Revision: https://reviews.llvm.org/D94732

	diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
	index 0f06adf38f7a..ee91eb4c5deb 100644
	--- a/clang/lib/Sema/SemaCUDA.cpp
	+++ b/clang/lib/Sema/SemaCUDA.cpp
	@@ -123,7 +123,8 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
	return CFT_Device;
	} else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
	return CFT_Host;
	- } else if (D->isImplicit() && !IgnoreImplicitHDAttr) {
	+ } else if ((D->isImplicit() \|\| !D->isUserProvided()) &&
	+ !IgnoreImplicitHDAttr) {
	// Some implicit declarations (like intrinsic functions) are not marked.
	// Set the most lenient target on them for maximal flexibility.
	return CFT_HostDevice;
	diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
	index 1ee52107c3da..d91db60f17a0 100644
	--- a/clang/lib/Sema/SemaExprCXX.cpp
	+++ b/clang/lib/Sema/SemaExprCXX.cpp
	@@ -1527,9 +1527,24 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
	bool Sema::isUsualDeallocationFunction(const CXXMethodDecl *Method) {
	// [CUDA] Ignore this function, if we can't call it.
	const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
	- if (getLangOpts().CUDA &&
	- IdentifyCUDAPreference(Caller, Method) <= CFP_WrongSide)
	- return false;
	+ if (getLangOpts().CUDA) {
	+ auto CallPreference = IdentifyCUDAPreference(Caller, Method);
	+ // If it's not callable at all, it's not the right function.
	+ if (CallPreference < CFP_WrongSide)
	+ return false;
	+ if (CallPreference == CFP_WrongSide) {
	+ // Maybe. We have to check if there are better alternatives.
	+ DeclContext::lookup_result R =
	+ Method->getDeclContext()->lookup(Method->getDeclName());
	+ for (const auto *D : R) {
	+ if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
	+ if (IdentifyCUDAPreference(Caller, FD) > CFP_WrongSide)
	+ return false;
	+ }
	+ }
	+ // We've found no better variants.
	+ }
	+ }

	SmallVector<const FunctionDecl*, 4> PreventedBy;
	bool Result = Method->isUsualDeallocationFunction(PreventedBy);
	diff --git a/clang/test/CodeGenCUDA/usual-deallocators.cu b/clang/test/CodeGenCUDA/usual-deallocators.cu
	index 7e7752497f34..6f4cc267a23f 100644
	--- a/clang/test/CodeGenCUDA/usual-deallocators.cu
	+++ b/clang/test/CodeGenCUDA/usual-deallocators.cu
	@@ -12,6 +12,19 @@ extern "C" __host__ void host_fn();
	extern "C" __device__ void dev_fn();
	extern "C" __host__ __device__ void hd_fn();

	+// Destructors are handled a bit differently, compared to regular functions.
	+// Make sure we do trigger kernel generation on the GPU side even if it's only
	+// referenced by the destructor.
	+template<typename T> __global__ void f(T) {}
	+template<typename T> struct A {
	+ ~A() { f<<<1, 1>>>(T()); }
	+};
	+
	+// HOST-LABEL: @a
	+A<int> a;
	+// HOST-LABEL: define linkonce_odr void @_ZN1AIiED1Ev
	+// search further down for the deice-side checks for @_Z1fIiEvT_
	+
	struct H1D1 {
	__host__ void operator delete(void *) { host_fn(); };
	__device__ void operator delete(void *) { dev_fn(); };
	@@ -95,6 +108,9 @@ __host__ __device__ void tests_hd(void *t) {
	test_hd<H1H2D1D2>(t);
	}

	+// Make sure that we've generated the kernel used by A::~A.
	+// DEVICE-LABEL: define dso_local void @_Z1fIiEvT_
	+
	// Make sure we've picked deallocator for the correct side of compilation.

	// COMMON-LABEL: define linkonce_odr void @_ZN4H1D1dlEPv(i8* %0)
	@@ -131,3 +147,5 @@ __host__ __device__ void tests_hd(void *t) {
	// COMMON-LABEL: define linkonce_odr void @_ZN8H1H2D1D2dlEPv(i8* %0)
	// DEVICE: call void @dev_fn()
	// HOST: call void @host_fn()
	+
	+// DEVICE: !0 = !{void (i32)* @_Z1fIiEvT_, !"kernel", i32 1}
	diff --git a/clang/test/SemaCUDA/usual-deallocators.cu b/clang/test/SemaCUDA/usual-deallocators.cu
	index a0238649c6dc..3670a3bf32c3 100644
	--- a/clang/test/SemaCUDA/usual-deallocators.cu
	+++ b/clang/test/SemaCUDA/usual-deallocators.cu
	@@ -93,3 +93,12 @@ __host__ __device__ void tests_hd(void *t) {
	test_hd<H1H2D2>(t);
	test_hd<H1H2D1D2>(t);
	}
	+
	+// This should produce no errors. Defaulted destructor should be treated as HD,
	+// which allows referencing host-only `operator delete` with a deferred
	+// diagnostics that would fire if we ever attempt to codegen it on device..
	+struct H {
	+ virtual ~H() = default;
	+ static void operator delete(void *) {}
	+};
	+H h;