| From 19f13620b3859711707a582eb67b84cf523fbf7d Mon Sep 17 00:00:00 2001 |
| From: Nikita Popov <npopov@redhat.com> |
| Date: Mon, 25 Jul 2022 15:12:10 +0200 |
| Subject: [PATCH] [ARM] Add target feature to force 32-bit atomics |
| |
| This adds a +atomic-32 target feature, which instructs LLVM to assume |
| that lock-free 32-bit atomics are available for this target, even |
| if they usually wouldn't be. |
| |
| If only atomic loads/stores are used, then this won't emit libcalls. |
| If atomic CAS is used, then the user is responsible for providing |
| any necessary __sync implementations (e.g. by masking interrupts |
| for single-core privileged use cases). |
| |
| See https://reviews.llvm.org/D120026#3674333 for context on this |
| change. The tl;dr is that the thumbv6m target in Rust has |
| historically made atomic load/store only available, which is |
| incompatible with the change from D120026, which switched these to |
| use libatomic. |
| |
| Differential Revision: https://reviews.llvm.org/D130480 |
| --- |
| llvm/lib/Target/ARM/ARM.td | 19 ++ |
| llvm/lib/Target/ARM/ARMISelLowering.cpp | 3 +- |
| llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll | 203 +++++++++++++++++++++ |
| 3 files changed, 224 insertions(+), 1 deletion(-) |
| create mode 100644 llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll |
| |
| diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td |
| index e8970b916a5f..2d4887749d4f 100644 |
| --- a/llvm/lib/Target/ARM/ARM.td |
| +++ b/llvm/lib/Target/ARM/ARM.td |
| @@ -546,6 +546,25 @@ def FeatureFixCortexA57AES1742098 : SubtargetFeature<"fix-cortex-a57-aes-1742098 |
| "FixCortexA57AES1742098", "true", |
| "Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">; |
| |
| +def FeatureAAPCSFrameChain : SubtargetFeature<"aapcs-frame-chain", |
| + "CreateAAPCSFrameChain", "true", |
| + "Create an AAPCS compliant frame chain">; |
| + |
| +def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf", |
| + "CreateAAPCSFrameChainLeaf", "true", |
| + "Create an AAPCS compliant frame chain " |
| + "for leaf functions", |
| + [FeatureAAPCSFrameChain]>; |
| + |
| +// Assume that lock-free 32-bit atomics are available, even if the target |
| +// and operating system combination would not usually provide them. The user |
| +// is responsible for providing any necessary __sync implementations. Code |
| +// built with this feature is not ABI-compatible with code built without this |
| +// feature, if atomic variables are exposed across the ABI boundary. |
| +def FeatureAtomics32 : SubtargetFeature< |
| + "atomics-32", "HasForced32BitAtomics", "true", |
| + "Assume that lock-free 32-bit atomics are available">; |
| + |
| //===----------------------------------------------------------------------===// |
| // ARM architecture class |
| // |
| diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp |
| index c67ac3e21779..725818735036 100644 |
| --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp |
| +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp |
| @@ -1362,7 +1362,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, |
| // instructions. (ARMv6 doesn't have dmb, but it has an equivalent |
| // encoding; see ARMISD::MEMBARRIER_MCR.) |
| setMaxAtomicSizeInBitsSupported(64); |
| - } else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) { |
| + } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) || |
| + Subtarget->hasForced32BitAtomics()) { |
| // Cortex-M (besides Cortex-M0) have 32-bit atomics. |
| setMaxAtomicSizeInBitsSupported(32); |
| } else { |
| diff --git a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll |
| new file mode 100644 |
| index 000000000000..36ed03a8c384 |
| --- /dev/null |
| +++ b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll |
| @@ -0,0 +1,203 @@ |
| +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| +; RUN: llc -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefixes=CHECK,NO-ATOMIC32 |
| +; RUN: llc -mtriple=thumbv6m-none-eabi -mattr=+atomics-32 < %s | FileCheck %s --check-prefixes=CHECK,ATOMIC32 |
| + |
| +define i32 @load32(ptr %p) { |
| +; NO-ATOMIC32-LABEL: load32: |
| +; NO-ATOMIC32: @ %bb.0: |
| +; NO-ATOMIC32-NEXT: .save {r7, lr} |
| +; NO-ATOMIC32-NEXT: push {r7, lr} |
| +; NO-ATOMIC32-NEXT: movs r1, #5 |
| +; NO-ATOMIC32-NEXT: bl __atomic_load_4 |
| +; NO-ATOMIC32-NEXT: pop {r7, pc} |
| +; |
| +; ATOMIC32-LABEL: load32: |
| +; ATOMIC32: @ %bb.0: |
| +; ATOMIC32-NEXT: ldr r0, [r0] |
| +; ATOMIC32-NEXT: dmb sy |
| +; ATOMIC32-NEXT: bx lr |
| + %v = load atomic i32, ptr %p seq_cst, align 4 |
| + ret i32 %v |
| +} |
| + |
| +define void @store32(ptr %p) { |
| +; NO-ATOMIC32-LABEL: store32: |
| +; NO-ATOMIC32: @ %bb.0: |
| +; NO-ATOMIC32-NEXT: .save {r7, lr} |
| +; NO-ATOMIC32-NEXT: push {r7, lr} |
| +; NO-ATOMIC32-NEXT: movs r1, #0 |
| +; NO-ATOMIC32-NEXT: movs r2, #5 |
| +; NO-ATOMIC32-NEXT: bl __atomic_store_4 |
| +; NO-ATOMIC32-NEXT: pop {r7, pc} |
| +; |
| +; ATOMIC32-LABEL: store32: |
| +; ATOMIC32: @ %bb.0: |
| +; ATOMIC32-NEXT: dmb sy |
| +; ATOMIC32-NEXT: movs r1, #0 |
| +; ATOMIC32-NEXT: str r1, [r0] |
| +; ATOMIC32-NEXT: dmb sy |
| +; ATOMIC32-NEXT: bx lr |
| + store atomic i32 0, ptr %p seq_cst, align 4 |
| + ret void |
| +} |
| + |
| +define i32 @rmw32(ptr %p) { |
| +; NO-ATOMIC32-LABEL: rmw32: |
| +; NO-ATOMIC32: @ %bb.0: |
| +; NO-ATOMIC32-NEXT: .save {r7, lr} |
| +; NO-ATOMIC32-NEXT: push {r7, lr} |
| +; NO-ATOMIC32-NEXT: movs r1, #1 |
| +; NO-ATOMIC32-NEXT: movs r2, #5 |
| +; NO-ATOMIC32-NEXT: bl __atomic_fetch_add_4 |
| +; NO-ATOMIC32-NEXT: pop {r7, pc} |
| +; |
| +; ATOMIC32-LABEL: rmw32: |
| +; ATOMIC32: @ %bb.0: |
| +; ATOMIC32-NEXT: .save {r7, lr} |
| +; ATOMIC32-NEXT: push {r7, lr} |
| +; ATOMIC32-NEXT: dmb sy |
| +; ATOMIC32-NEXT: movs r1, #1 |
| +; ATOMIC32-NEXT: bl __sync_fetch_and_add_4 |
| +; ATOMIC32-NEXT: dmb sy |
| +; ATOMIC32-NEXT: pop {r7, pc} |
| + %v = atomicrmw add ptr %p, i32 1 seq_cst, align 4 |
| + ret i32 %v |
| +} |
| + |
| +define i32 @cmpxchg32(ptr %p) { |
| +; NO-ATOMIC32-LABEL: cmpxchg32: |
| +; NO-ATOMIC32: @ %bb.0: |
| +; NO-ATOMIC32-NEXT: .save {r7, lr} |
| +; NO-ATOMIC32-NEXT: push {r7, lr} |
| +; NO-ATOMIC32-NEXT: .pad #8 |
| +; NO-ATOMIC32-NEXT: sub sp, #8 |
| +; NO-ATOMIC32-NEXT: movs r1, #0 |
| +; NO-ATOMIC32-NEXT: str r1, [sp, #4] |
| +; NO-ATOMIC32-NEXT: movs r3, #5 |
| +; NO-ATOMIC32-NEXT: str r3, [sp] |
| +; NO-ATOMIC32-NEXT: add r1, sp, #4 |
| +; NO-ATOMIC32-NEXT: movs r2, #1 |
| +; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_4 |
| +; NO-ATOMIC32-NEXT: ldr r0, [sp, #4] |
| +; NO-ATOMIC32-NEXT: add sp, #8 |
| +; NO-ATOMIC32-NEXT: pop {r7, pc} |
| +; |
| +; ATOMIC32-LABEL: cmpxchg32: |
| +; ATOMIC32: @ %bb.0: |
| +; ATOMIC32-NEXT: .save {r7, lr} |
| +; ATOMIC32-NEXT: push {r7, lr} |
| +; ATOMIC32-NEXT: dmb sy |
| +; ATOMIC32-NEXT: movs r1, #0 |
| +; ATOMIC32-NEXT: movs r2, #1 |
| +; ATOMIC32-NEXT: bl __sync_val_compare_and_swap_4 |
| +; ATOMIC32-NEXT: dmb sy |
| +; ATOMIC32-NEXT: pop {r7, pc} |
| + %res = cmpxchg ptr %p, i32 0, i32 1 seq_cst seq_cst |
| + %res.0 = extractvalue { i32, i1 } %res, 0 |
| + ret i32 %res.0 |
| +} |
| + |
| +define i64 @load64(ptr %p) { |
| +; CHECK-LABEL: load64: |
| +; CHECK: @ %bb.0: |
| +; CHECK-NEXT: .save {r7, lr} |
| +; CHECK-NEXT: push {r7, lr} |
| +; CHECK-NEXT: .pad #8 |
| +; CHECK-NEXT: sub sp, #8 |
| +; CHECK-NEXT: mov r1, r0 |
| +; CHECK-NEXT: movs r0, #8 |
| +; CHECK-NEXT: mov r2, sp |
| +; CHECK-NEXT: movs r3, #5 |
| +; CHECK-NEXT: bl __atomic_load |
| +; CHECK-NEXT: ldr r1, [sp, #4] |
| +; CHECK-NEXT: ldr r0, [sp] |
| +; CHECK-NEXT: add sp, #8 |
| +; CHECK-NEXT: pop {r7, pc} |
| + %v = load atomic i64, ptr %p seq_cst, align 4 |
| + ret i64 %v |
| +} |
| + |
| +define void @store64(ptr %p) { |
| +; CHECK-LABEL: store64: |
| +; CHECK: @ %bb.0: |
| +; CHECK-NEXT: .save {r7, lr} |
| +; CHECK-NEXT: push {r7, lr} |
| +; CHECK-NEXT: .pad #8 |
| +; CHECK-NEXT: sub sp, #8 |
| +; CHECK-NEXT: mov r1, r0 |
| +; CHECK-NEXT: movs r0, #0 |
| +; CHECK-NEXT: str r0, [sp, #4] |
| +; CHECK-NEXT: str r0, [sp] |
| +; CHECK-NEXT: movs r0, #8 |
| +; CHECK-NEXT: mov r2, sp |
| +; CHECK-NEXT: movs r3, #5 |
| +; CHECK-NEXT: bl __atomic_store |
| +; CHECK-NEXT: add sp, #8 |
| +; CHECK-NEXT: pop {r7, pc} |
| + store atomic i64 0, ptr %p seq_cst, align 4 |
| + ret void |
| +} |
| + |
| +define i64 @rmw64(ptr %p) { |
| +; CHECK-LABEL: rmw64: |
| +; CHECK: @ %bb.0: |
| +; CHECK-NEXT: .save {r4, lr} |
| +; CHECK-NEXT: push {r4, lr} |
| +; CHECK-NEXT: .pad #24 |
| +; CHECK-NEXT: sub sp, #24 |
| +; CHECK-NEXT: mov r4, r0 |
| +; CHECK-NEXT: ldr r0, [r0] |
| +; CHECK-NEXT: ldr r1, [r4, #4] |
| +; CHECK-NEXT: .LBB6_1: @ %atomicrmw.start |
| +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| +; CHECK-NEXT: str r0, [sp, #16] |
| +; CHECK-NEXT: str r1, [sp, #20] |
| +; CHECK-NEXT: movs r2, #0 |
| +; CHECK-NEXT: adds r0, r0, #1 |
| +; CHECK-NEXT: adcs r2, r1 |
| +; CHECK-NEXT: str r2, [sp, #12] |
| +; CHECK-NEXT: str r0, [sp, #8] |
| +; CHECK-NEXT: movs r0, #5 |
| +; CHECK-NEXT: str r0, [sp] |
| +; CHECK-NEXT: str r0, [sp, #4] |
| +; CHECK-NEXT: movs r0, #8 |
| +; CHECK-NEXT: add r2, sp, #16 |
| +; CHECK-NEXT: add r3, sp, #8 |
| +; CHECK-NEXT: mov r1, r4 |
| +; CHECK-NEXT: bl __atomic_compare_exchange |
| +; CHECK-NEXT: mov r2, r0 |
| +; CHECK-NEXT: ldr r1, [sp, #20] |
| +; CHECK-NEXT: ldr r0, [sp, #16] |
| +; CHECK-NEXT: cmp r2, #0 |
| +; CHECK-NEXT: beq .LBB6_1 |
| +; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end |
| +; CHECK-NEXT: add sp, #24 |
| +; CHECK-NEXT: pop {r4, pc} |
| + %v = atomicrmw add ptr %p, i64 1 seq_cst, align 4 |
| + ret i64 %v |
| +} |
| + |
| +define i64 @cmpxchg64(ptr %p) { |
| +; CHECK-LABEL: cmpxchg64: |
| +; CHECK: @ %bb.0: |
| +; CHECK-NEXT: .save {r7, lr} |
| +; CHECK-NEXT: push {r7, lr} |
| +; CHECK-NEXT: .pad #16 |
| +; CHECK-NEXT: sub sp, #16 |
| +; CHECK-NEXT: movs r3, #0 |
| +; CHECK-NEXT: str r3, [sp, #12] |
| +; CHECK-NEXT: str r3, [sp, #8] |
| +; CHECK-NEXT: movs r1, #5 |
| +; CHECK-NEXT: str r1, [sp] |
| +; CHECK-NEXT: str r1, [sp, #4] |
| +; CHECK-NEXT: add r1, sp, #8 |
| +; CHECK-NEXT: movs r2, #1 |
| +; CHECK-NEXT: bl __atomic_compare_exchange_8 |
| +; CHECK-NEXT: ldr r1, [sp, #12] |
| +; CHECK-NEXT: ldr r0, [sp, #8] |
| +; CHECK-NEXT: add sp, #16 |
| +; CHECK-NEXT: pop {r7, pc} |
| + %res = cmpxchg ptr %p, i64 0, i64 1 seq_cst seq_cst |
| + %res.0 = extractvalue { i64, i1 } %res, 0 |
| + ret i64 %res.0 |
| +} |
| -- |
| 2.38.1.431.g37b22c650d-goog |
| |