| commit d0ee8b64ecf359737ce550d8f47f465ab6657be7 |
| Author: Teresa Johnson <tejohnson@google.com> |
| Date: Wed Jun 2 16:37:23 2021 -0700 |
| |
| [LTO] Fix -fwhole-program-vtables handling after HIP ThinLTO patch |
| |
| A recent change (D99683) to support ThinLTO for HIP caused a regression |
| when compiling cuda code with -flto=thin -fwhole-program-vtables. |
| Specifically, we now get an error: |
| error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' |
| |
| This error is coming from the device offload cc1 action being set up for |
| the cuda compile, for which -flto=thin doesn't apply and gets dropped. |
| This is a regression, but points to a potential issue that was silently |
| occurring before the patch, details below. |
| |
| Before D99683, the check for fwhole-program-vtables in the driver looked |
| like: |
| |
| if (WholeProgramVTables) { |
| if (!D.isUsingLTO()) |
| D.Diag(diag::err_drv_argument_only_allowed_with) |
| << "-fwhole-program-vtables" |
| << "-flto"; |
| CmdArgs.push_back("-fwhole-program-vtables"); |
| } |
| |
| And D.isUsingLTO() returned true since we have -flto=thin. However, |
| because the cuda cc1 compile is doing device offloading, which didn't |
| support any LTO, there was other code that suppressed -flto* options |
| from being passed to the cc1 invocation. So the cc1 invocation silently |
| had -fwhole-program-vtables without any -flto*. This seems potentially |
| problematic, since if we had any virtual calls we would get type test |
| assume sequences without the corresponding LTO pass that handles them. |
| |
| However, with the patch, which adds support for device offloading LTO |
| option -foffload-lto=thin, the code has changed so that we set a bool |
| IsUsingLTO based on either -flto* or -foffload-lto*, depending on |
| whether this is the device offloading action. For the device offload |
| action in our compile, since we don't have -foffload-lto, IsUsingLTO is |
| false, and the check for LTO with -fwhole-program-vtables now fails. |
| |
| What we should do is only pass through -fwhole-program-vtables to the |
| cc1 invocation that has LTO enabled (either the device offload action |
| with -foffload-lto, or the non-device offload action with -flto), and |
| otherwise drop the -fwhole-program-vtables for the non-LTO action. |
| Then we should error only if we have -fwhole-program-vtables without any |
| -f*lto* options. |
| |
| Differential Revision: https://reviews.llvm.org/D103579 |
| |
| diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp |
| index dea4ade683ef..ee40df35b850 100644 |
| --- a/clang/lib/Driver/ToolChains/Clang.cpp |
| +++ b/clang/lib/Driver/ToolChains/Clang.cpp |
| @@ -6647,11 +6647,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, |
| } |
| |
| if (WholeProgramVTables) { |
| - if (!IsUsingLTO) |
| + // Propagate -fwhole-program-vtables if this is an LTO compile. |
| + if (IsUsingLTO) |
| + CmdArgs.push_back("-fwhole-program-vtables"); |
| + // Check if we passed LTO options but they were suppressed because this is a |
| + // device offloading action, or we passed device offload LTO options which |
| + // were suppressed because this is not the device offload action. |
| + // Otherwise, issue an error. |
| + else if (!D.isUsingLTO(!IsDeviceOffloadAction)) |
| D.Diag(diag::err_drv_argument_only_allowed_with) |
| << "-fwhole-program-vtables" |
| << "-flto"; |
| - CmdArgs.push_back("-fwhole-program-vtables"); |
| } |
| |
| bool DefaultsSplitLTOUnit = |
| diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu |
| index 175e4b877ce9..5b67d7e4d04f 100644 |
| --- a/clang/test/Driver/cuda-options.cu |
| +++ b/clang/test/Driver/cuda-options.cu |
| @@ -183,6 +183,12 @@ |
| // RUN: -c %s 2>&1 \ |
| // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s |
| |
| +// Verify -flto=thin -fwhole-program-vtables handling. This should result in |
| +// both options being passed to the host compilation, with neither passed to |
| +// the device compilation. |
| +// RUN: %clang -### -target x86_64-linux-gnu -c -flto=thin -fwhole-program-vtables %s 2>&1 \ |
| +// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s |
| +// THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' |
| |
| // ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20" |
| // NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20" |
| @@ -206,8 +212,10 @@ |
| // Match the job that produces PTX assembly. |
| // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda" |
| // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" |
| +// THINLTOWPD-NOT: "-flto=thin" |
| // DEVICE-SAME: "-fcuda-is-device" |
| // DEVICE-SM30-SAME: "-target-cpu" "sm_30" |
| +// THINLTOWPD-NOT: "-fwhole-program-vtables" |
| // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]" |
| // DEVICE-NOSAVE-SAME: "-x" "cuda" |
| // DEVICE-SAVE-SAME: "-x" "ir" |
| @@ -252,12 +260,14 @@ |
| // Match host-side compilation. |
| // HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu" |
| // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda" |
| +// THINLTOWPD-SAME: "-flto=thin" |
| // HOST-NOT: "-fcuda-is-device" |
| // There is only one GPU binary after combining it with fatbinary! |
| // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" |
| // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]" |
| // There is only one GPU binary after combining it with fatbinary. |
| // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" |
| +// THINLTOWPD-SAME: "-fwhole-program-vtables" |
| // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]" |
| // HOST-NOSAVE-SAME: "-x" "cuda" |
| // HOST-SAVE-SAME: "-x" "cuda-cpp-output" |
| diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip |
| index ec723053da05..08a821c89a19 100644 |
| --- a/clang/test/Driver/hip-options.hip |
| +++ b/clang/test/Driver/hip-options.hip |
| @@ -60,13 +60,30 @@ |
| // Check -foffload-lto=thin translated correctly. |
| |
| // RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ |
| -// RUN: --cuda-gpu-arch=gfx906 -foffload-lto=thin %s 2>&1 \ |
| -// RUN: | FileCheck -check-prefix=THINLTO %s |
| +// RUN: --cuda-gpu-arch=gfx906 -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \ |
| +// RUN: | FileCheck -check-prefix=HIPTHINLTO %s |
| + |
| +// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ |
| +// RUN: --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \ |
| +// RUN: | FileCheck -check-prefix=HIPTHINLTO %s |
| |
| +// Ensure we don't error about -fwhole-program-vtables for the non-device offload compile. |
| +// HIPTHINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' |
| +// HIPTHINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit" |
| +// HIPTHINLTO: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit" {{.*}} "-fwhole-program-vtables" |
| +// HIPTHINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit" |
| +// HIPTHINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906" "-plugin-opt=thinlto" "-plugin-opt=-force-import-all" |
| + |
| +// Check that -flto=thin is handled correctly, particularly with -fwhole-program-vtables. |
| +// |
| // RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ |
| -// RUN: --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin %s 2>&1 \ |
| +// RUN: --cuda-gpu-arch=gfx906 -flto=thin -fwhole-program-vtables %s 2>&1 \ |
| // RUN: | FileCheck -check-prefix=THINLTO %s |
| |
| -// THINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit" |
| -// THINLTO: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit" |
| -// THINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906" "-plugin-opt=thinlto" "-plugin-opt=-force-import-all" |
| +// Ensure we don't error about -fwhole-program-vtables for the device offload compile. We should |
| +// drop -fwhole-program-vtables for the device offload compile and pass it through for the |
| +// non-device offload compile along with -flto=thin. |
| +// THINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' |
| +// THINLTO-NOT: clang{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables" |
| +// THINLTO: clang{{.*}}" "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto=thin" {{.*}} "-fwhole-program-vtables" |
| +// THINLTO-NOT: clang{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables" |