| commit 1de10705594c7a2c9b8fde901c391bd84062ae04 |
| Author: David Green <david.green@arm.com> |
| Date: Fri Feb 28 18:31:45 2020 +0000 |
| |
| [DAGCombine] Fix alias analysis for unaligned accesses |
| |
| The alias analysis in DAG Combine looks at the BaseAlign, the Offset and |
| the Size of two accesses, and determines if they are known to access |
| different parts of memory by the fact that they are different offsets |
| from inside that "alignment window". It does not seem to account for |
| accesses that are not a multiple of the size, and may overflow from one |
| alignment window into another. |
| |
| For example in the test case we have a 19byte memset that is splits into |
| a 16 byte neon store and an unaligned 4 byte store with a 15 byte |
| offset. This 15byte offset (with a base align of 8) wraps around to the |
| next alignment windows. When compared to an access that is a 16byte |
| offset (of the same 4byte size and 8byte basealign), the two accesses |
| are said not to alias. |
| |
| I've fixed this here by just ensuring that the offsets are a multiple of |
| the size, ensuring that they don't overlap by wrapping. Fixes PR45035, |
| which was exposed by the UseAA changes in the arm backend. |
| |
| Differential Revision: https://reviews.llvm.org/D75238 |
| |
| diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |
| index e2d1c3c9b1b..f95519b8eba 100644 |
| --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |
| +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |
| @@ -21107,21 +21107,24 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { |
| // If we know required SrcValue1 and SrcValue2 have relatively large |
| // alignment compared to the size and offset of the access, we may be able |
| // to prove they do not alias. This check is conservative for now to catch |
| - // cases created by splitting vector types. |
| + // cases created by splitting vector types, it only works when the offsets are |
| + // multiples of the size of the data. |
| int64_t SrcValOffset0 = MUC0.MMO->getOffset(); |
| int64_t SrcValOffset1 = MUC1.MMO->getOffset(); |
| unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment(); |
| unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment(); |
| + auto &Size0 = MUC0.NumBytes; |
| + auto &Size1 = MUC1.NumBytes; |
| if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && |
| - MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() && |
| - *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) { |
| + Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 && |
| + OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 && |
| + SrcValOffset1 % *Size1 == 0) { |
| int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; |
| int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; |
| |
| // There is no overlap between these relatively aligned accesses of |
| // similar size. Return no alias. |
| - if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 || |
| - (OffAlign1 + *MUC1.NumBytes) <= OffAlign0) |
| + if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0) |
| return false; |
| } |
| |
| @@ -21134,11 +21137,12 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { |
| UseAA = false; |
| #endif |
| |
| - if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) { |
| + if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && |
| + Size0.hasValue() && Size1.hasValue()) { |
| // Use alias analysis information. |
| int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); |
| - int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset; |
| - int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset; |
| + int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset; |
| + int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset; |
| AliasResult AAResult = AA->alias( |
| MemoryLocation(MUC0.MMO->getValue(), Overlap0, |
| UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()), |
| diff --git a/llvm/test/CodeGen/ARM/memset-align.ll b/llvm/test/CodeGen/ARM/memset-align.ll |
| new file mode 100644 |
| index 00000000000..b8f42f8de9c |
| --- /dev/null |
| +++ b/llvm/test/CodeGen/ARM/memset-align.ll |
| @@ -0,0 +1,39 @@ |
| +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| +; RUN: llc < %s -mtriple=thumbv8-unknown-linux-android -o - | FileCheck %s |
| + |
| +%struct.af = type <{ i64, i64, i8, i8, i8, [5 x i8] }> |
| + |
| +define void @test() { |
| +; CHECK-LABEL: test: |
| +; CHECK: @ %bb.0: @ %entry |
| +; CHECK-NEXT: .save {r7, lr} |
| +; CHECK-NEXT: push {r7, lr} |
| +; CHECK-NEXT: .pad #24 |
| +; CHECK-NEXT: sub sp, #24 |
| +; CHECK-NEXT: mov r0, sp |
| +; CHECK-NEXT: mov.w r1, #-1 |
| +; CHECK-NEXT: vmov.i32 q8, #0x0 |
| +; CHECK-NEXT: movs r2, #15 |
| +; CHECK-NEXT: mov r3, r0 |
| +; CHECK-NEXT: strd r1, r1, [sp, #8] |
| +; CHECK-NEXT: strd r1, r1, [sp] |
| +; CHECK-NEXT: str r1, [sp, #16] |
| +; CHECK-NEXT: vst1.64 {d16, d17}, [r3], r2 |
| +; CHECK-NEXT: movs r2, #0 |
| +; CHECK-NEXT: str r2, [r3] |
| +; CHECK-NEXT: str r1, [sp, #20] |
| +; CHECK-NEXT: bl callee |
| +; CHECK-NEXT: add sp, #24 |
| +; CHECK-NEXT: pop {r7, pc} |
| +entry: |
| + %a = alloca %struct.af, align 8 |
| + %0 = bitcast %struct.af* %a to i8* |
| + %1 = bitcast %struct.af* %a to i8* |
| + call void @llvm.memset.p0i8.i64(i8* align 8 %1, i8 -1, i64 24, i1 false) |
| + call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 19, i1 false) |
| + call void @callee(%struct.af* %a) |
| + ret void |
| +} |
| + |
| +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) |
| +declare void @callee(%struct.af*) local_unnamed_addr #1 |