[Clang][AMDGPU] Lower `__amdgpu_texture_t` to `<8 x i32>` instead of ptr adrspace(0) (#187774)

Fix the IR lowering for `__amdgpu_texture_t` to generate a single
256-bit load instead of a double indirection through a flat pointer.

Previously, `__amdgpu_texture_t` was lowered to `ptr addrspace(0)`
(64-bit flat pointer), which caused the double load and indirection.
With the same reproducer like #187697.

```c
#define TSHARP __constant uint *

// Old tsharp handling:
// #define LOAD_TSHARP(I) *(__constant uint8 *)I

#define LOAD_TSHARP(I) *(__constant __amdgpu_texture_t *)I

float4 test_image_load_1D(TSHARP i, int c) {
  return __builtin_amdgcn_image_load_1d_v4f32_i32(15, c, LOAD_TSHARP(i), 0, 0);
}
```
old output: 

```llvm
define hidden <4 x float> @test_image_load_1D(ptr addrspace(4) noundef readonly captures(none) %i, i32 noundef %c) local_unnamed_addr #0 {
entry:
  %0 = load ptr, ptr addrspace(4) %i, align 32, !tbaa !9
  %1 = addrspacecast ptr %0 to ptr addrspace(1)
  %tex.rsrc.val = load <8 x i32>, ptr addrspace(1) %1, align 32
  %2 = tail call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 %c, <8 x i32> %tex.rsrc.val, i32 0, i32 0)
  ret <4 x float> %2
}
```
This matches the old `__constant uint8 *` behavior. With this fix new
output is
```llvm
define hidden <4 x float> @test_image_load_1D(ptr addrspace(4) noundef readonly captures(none) %0, i32 noundef %1) local_unnamed_addr #0 {
  %3 = load <8 x i32>, ptr addrspace(4) %0, align 32, !tbaa !10
  %4 = tail call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 %1, <8 x i32> %3, i32 0, i32 0)
  ret <4 x float> %4
}
```

Fixes #187697
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 6bd7905..29b4075 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -599,8 +599,13 @@
   } break;
 #include "clang/Basic/WebAssemblyReferenceTypes.def"
 #define AMDGPU_OPAQUE_PTR_TYPE(Name, Id, SingletonId, Width, Align, AS)        \
-  case BuiltinType::Id:                                                        \
-    return llvm::PointerType::get(getLLVMContext(), AS);
+  case BuiltinType::Id: {                                                      \
+    if (BuiltinType::Id == BuiltinType::AMDGPUTexture) {                       \
+      return llvm::FixedVectorType::get(                                       \
+          llvm::Type::getInt32Ty(getLLVMContext()), 8);                        \
+    }                                                                          \
+    return llvm::PointerType::get(getLLVMContext(), AS);                       \
+  }
 #define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope)  \
   case BuiltinType::Id:                                                        \
     return llvm::TargetExtType::get(getLLVMContext(), "amdgcn.named.barrier",  \
diff --git a/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c b/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c
index ef68c79..aba6c71 100644
--- a/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c
+++ b/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c
@@ -3,13 +3,13 @@
 // RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s -debug-info-kind=limited | FileCheck %s
 
 // CHECK-LABEL: define dso_local void @test_locals(
-// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] {
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[IMG:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[IMG:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[IMG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IMG]] to ptr
-// CHECK-NEXT:      #dbg_declare(ptr addrspace(5) [[IMG]], [[META11:![0-9]+]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), [[META14:![0-9]+]])
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[IMG_ASCAST]], align 32, !dbg [[DBG15:![0-9]+]]
-// CHECK-NEXT:    ret void, !dbg [[DBG16:![0-9]+]]
+// CHECK-NEXT:      #dbg_declare(ptr addrspace(5) [[IMG]], [[META10:![0-9]+]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), [[META13:![0-9]+]])
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr [[IMG_ASCAST]], align 32, !dbg [[DBG14:![0-9]+]]
+// CHECK-NEXT:    ret void, !dbg [[DBG15:![0-9]+]]
 //
 void test_locals(void) {
   __amdgpu_texture_t img;
diff --git a/clang/test/CodeGen/builtins-extended-image.c b/clang/test/CodeGen/builtins-extended-image.c
index 9ac7ec42..f487710 100644
--- a/clang/test/CodeGen/builtins-extended-image.c
+++ b/clang/test/CodeGen/builtins-extended-image.c
@@ -6,12 +6,12 @@
 typedef _Float16 half4 __attribute__((ext_vector_type(4)));
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -21,14 +21,13 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -37,12 +36,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -52,14 +51,13 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 2, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 2, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -68,12 +66,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -83,14 +81,13 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 4, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 4, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -99,12 +96,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -114,14 +111,13 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 8, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 8, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -130,12 +126,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -145,13 +141,12 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP3]]
 //
 float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -160,12 +155,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -175,14 +170,13 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -191,12 +185,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -206,15 +200,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP5]]
 //
 float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -223,12 +216,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -238,14 +231,13 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -254,12 +246,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -269,15 +261,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 10, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 10, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP5]]
 //
 float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -286,12 +277,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -301,7 +292,7 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
@@ -309,10 +300,9 @@
 // CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32
+// CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TMP6]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP8]]
 //
 float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -320,12 +310,12 @@
   return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(15, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
 }
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_3d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -335,15 +325,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP5]]
 //
 float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -352,12 +341,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_3d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -367,16 +356,15 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TMP4]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP6]]
 //
 float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -385,12 +373,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_3d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -400,7 +388,7 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
@@ -411,10 +399,9 @@
 // CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP9]], align 32
+// CHECK-NEXT:    [[TMP9:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP11:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP11:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TMP9]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP11]]
 //
 float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -423,12 +410,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_cube_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -438,15 +425,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP5]]
 //
 float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -455,12 +441,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_cube_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -470,16 +456,15 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TMP4]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP6]]
 //
 float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -488,12 +473,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1darray_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -503,14 +488,13 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -519,12 +503,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1darray_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -534,15 +518,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP5]]
 //
 float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -551,12 +534,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1darray_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -566,16 +549,15 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TMP4]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP6]]
 //
 float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -584,12 +566,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2darray_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -599,15 +581,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP5]]
 //
 float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -616,12 +597,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2darray_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -631,16 +612,15 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TMP4]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP6]]
 //
 float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -649,12 +629,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2darray_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -664,7 +644,7 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
@@ -673,10 +653,9 @@
 // CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32
+// CHECK-NEXT:    [[TMP7:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP9:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP9:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TMP7]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP9]]
 //
 float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -685,12 +664,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1d_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -700,13 +679,12 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP3]]
 //
 half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -715,12 +693,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1d_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -730,14 +708,13 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP4]]
 //
 half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -746,12 +723,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1d_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -761,15 +738,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP5]]
 //
 half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -778,12 +754,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2d_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -793,14 +769,13 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP4]]
 //
 half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -809,12 +784,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2d_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -824,15 +799,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP5]]
 //
 half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -841,12 +815,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2d_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -856,7 +830,7 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
@@ -864,10 +838,9 @@
 // CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32
+// CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP8:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP8:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TMP6]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP8]]
 //
 half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -876,12 +849,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_3d_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -891,15 +864,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP5]]
 //
 half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -908,12 +880,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_3d_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -923,16 +895,15 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TMP4]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP6]]
 //
 half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -941,12 +912,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_3d_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -956,7 +927,7 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
@@ -967,10 +938,9 @@
 // CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP9]], align 32
+// CHECK-NEXT:    [[TMP9:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP11:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP11:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TMP9]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP11]]
 //
 half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -979,12 +949,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_cube_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -994,15 +964,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP5]]
 //
 half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1011,12 +980,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_cube_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1026,16 +995,15 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TMP4]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP6]]
 //
 half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1044,12 +1012,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1darray_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1059,14 +1027,13 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP4]]
 //
 half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1075,12 +1042,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1darray_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1090,15 +1057,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP5]]
 //
 half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1107,12 +1073,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1darray_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1122,16 +1088,15 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TMP4]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP6]]
 //
 half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1140,12 +1105,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2darray_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1155,15 +1120,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP5]]
 //
 half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1172,12 +1136,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2darray_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1187,16 +1151,15 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TMP4]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP6]]
 //
 half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1205,12 +1168,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2darray_v4f16_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1220,7 +1183,7 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
@@ -1229,10 +1192,9 @@
 // CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32
+// CHECK-NEXT:    [[TMP7:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP9:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP9:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TMP7]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP9]]
 //
 half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1241,12 +1203,12 @@
 }
 
 // CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2d_f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1256,14 +1218,13 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret float [[TMP4]]
 //
 float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1272,12 +1233,12 @@
 }
 
 // CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2d_f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1287,15 +1248,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.l.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.l.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret float [[TMP5]]
 //
 float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1304,12 +1264,12 @@
 }
 
 // CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2d_f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1319,7 +1279,7 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
@@ -1327,10 +1287,9 @@
 // CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32
+// CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP8:%.*]] = call float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP8:%.*]] = call float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TMP6]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret float [[TMP8]]
 //
 float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1339,12 +1298,12 @@
 }
 
 // CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2darray_f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1354,15 +1313,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.lz.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.lz.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret float [[TMP5]]
 //
 float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1371,12 +1329,12 @@
 }
 
 // CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2darray_f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1386,16 +1344,15 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP6:%.*]] = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP6:%.*]] = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TMP4]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret float [[TMP6]]
 //
 float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1404,12 +1361,12 @@
 }
 
 // CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2darray_f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -1419,7 +1376,7 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
@@ -1428,10 +1385,9 @@
 // CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32
+// CHECK-NEXT:    [[TMP7:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TMP7]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret float [[TMP9]]
 //
 float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
diff --git a/clang/test/CodeGen/builtins-image-load.c b/clang/test/CodeGen/builtins-image-load.c
index a6b8458..102ba50 100644
--- a/clang/test/CodeGen/builtins-image-load.c
+++ b/clang/test/CodeGen/builtins-image-load.c
@@ -7,22 +7,21 @@
 typedef half half4 __attribute__((ext_vector_type(4)));
 
 // CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 2, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 106, i32 103)
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 2, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 106, i32 103)
 // CHECK-NEXT:    ret float [[TMP3]]
 //
 float test_builtin_image_load_2d(float f32, int i32, __amdgpu_texture_t tex) {
@@ -31,22 +30,21 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP3]]
 //
 float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -55,22 +53,21 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP3]]
 //
 half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -79,23 +76,22 @@
 }
 
 // CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 4, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 4, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret float [[TMP4]]
 //
 float test_builtin_image_load_2darray(float f32, int i32, __amdgpu_texture_t tex) {
@@ -104,23 +100,22 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -129,23 +124,22 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP4]]
 //
 half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -154,21 +148,20 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP2]]
 //
 float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -177,21 +170,20 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP2]]
 //
 half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -200,22 +192,21 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP3]]
 //
 float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -224,22 +215,21 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP3]]
 //
 half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -248,23 +238,22 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -273,23 +262,22 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP4]]
 //
 half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -298,23 +286,22 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -323,23 +310,22 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP4]]
 //
 half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -348,22 +334,21 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP3]]
 //
 float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -372,22 +357,21 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP3]]
 //
 half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -396,23 +380,22 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -421,23 +404,22 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP4]]
 //
 half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -446,23 +428,22 @@
 }
 
 // CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 8, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 8, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret float [[TMP4]]
 //
 float test_builtin_image_load_mip_2d(float f32, int i32, __amdgpu_texture_t tex) {
@@ -471,23 +452,22 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -496,23 +476,22 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP4]]
 //
 half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -521,24 +500,23 @@
 }
 
 // CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 8, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 8, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret float [[TMP5]]
 //
 float test_builtin_image_load_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) {
@@ -547,24 +525,23 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP5]]
 //
 float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -573,24 +550,23 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP5]]
 //
 half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -599,24 +575,23 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP5]]
 //
 float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -625,24 +600,23 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP5]]
 //
 half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -651,24 +625,23 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP5]]
 //
 float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -677,24 +650,23 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP5]]
 //
 half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -703,12 +675,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -718,13 +690,12 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP3]]
 //
 float4 test_builtin_amdgcn_image_sample_1d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -732,12 +703,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1d_v4f16_f32(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -747,13 +718,12 @@
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP3]]
 //
 half4 test_builtin_amdgcn_image_sample_1d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -761,11 +731,11 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1darray_v4f32_f32(
-// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -773,14 +743,13 @@
 // CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_builtin_amdgcn_image_sample_1darray_v4f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -788,12 +757,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1darray_v4f16_f32(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -803,14 +772,13 @@
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP4]]
 //
 half4 test_builtin_amdgcn_image_sample_1darray_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -818,11 +786,11 @@
 }
 
 // CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2d_f32_f32(
-// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -830,14 +798,13 @@
 // CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret float [[TMP4]]
 //
 float test_builtin_amdgcn_image_sample_2d_f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -845,12 +812,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -860,14 +827,13 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
 //
 float4 test_builtin_amdgcn_image_sample_2d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -875,12 +841,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2d_v4f16_f32(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -890,14 +856,13 @@
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP4]]
 //
 half4 test_builtin_amdgcn_image_sample_2d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -905,11 +870,11 @@
 }
 
 // CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2darray_f32_f32(
-// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
@@ -917,15 +882,14 @@
 // CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.2darray.f32.f32.v8i32.v4i32(i32 4, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.2darray.f32.f32.v8i32.v4i32(i32 4, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret float [[TMP5]]
 //
 float test_builtin_amdgcn_image_sample_2darray_f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -933,12 +897,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2darray_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -948,15 +912,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP5]]
 //
 float4 test_builtin_amdgcn_image_sample_2darray_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -964,12 +927,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2darray_v4f16_f32(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -979,15 +942,14 @@
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP5]]
 //
 half4 test_builtin_amdgcn_image_sample_2darray_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -995,12 +957,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_3d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1010,15 +972,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP5]]
 //
 float4 test_builtin_amdgcn_image_sample_3d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1026,12 +987,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_3d_v4f16_f32(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1041,15 +1002,14 @@
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP5]]
 //
 half4 test_builtin_amdgcn_image_sample_3d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1057,12 +1017,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_cube_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1072,15 +1032,14 @@
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x float> [[TMP5]]
 //
 float4 test_builtin_amdgcn_image_sample_cube_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
@@ -1088,12 +1047,12 @@
 }
 
 // CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_cube_v4f16_f32(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
@@ -1103,15 +1062,14 @@
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
 // CHECK-NEXT:    ret <4 x half> [[TMP5]]
 //
 half4 test_builtin_amdgcn_image_sample_cube_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
diff --git a/clang/test/CodeGen/builtins-image-store.c b/clang/test/CodeGen/builtins-image-store.c
index 9a56202..a9953c5 100644
--- a/clang/test/CodeGen/builtins-image-store.c
+++ b/clang/test/CodeGen/builtins-image-store.c
@@ -6,23 +6,22 @@
 typedef half half4 __attribute__((ext_vector_type(4)));
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_2d(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float [[TMP0]], i32 12, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 106, i32 103)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float [[TMP0]], i32 12, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 106, i32 103)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_2d(float f32, int i32, __amdgpu_texture_t tex) {
@@ -31,23 +30,22 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -56,23 +54,22 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -81,24 +78,23 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_2darray(float f32, int i32, __amdgpu_texture_t tex) {
@@ -107,24 +103,23 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -133,24 +128,23 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -159,22 +153,21 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -183,22 +176,21 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -207,23 +199,22 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -232,23 +223,22 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -257,24 +247,23 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -283,24 +272,23 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -309,24 +297,23 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -335,24 +322,23 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -361,23 +347,22 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -386,23 +371,22 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -411,24 +395,23 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -437,24 +420,23 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -463,24 +445,23 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.2d.f32.i32.v8i32(float [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.2d.f32.i32.v8i32(float [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_2d(float f32, int i32, __amdgpu_texture_t tex) {
@@ -489,24 +470,23 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -515,24 +495,23 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -541,25 +520,24 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.2darray.f32.i32.v8i32(float [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.2darray.f32.i32.v8i32(float [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) {
@@ -568,25 +546,24 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -595,25 +572,24 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -622,25 +598,24 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -649,25 +624,24 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
@@ -676,25 +650,24 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
@@ -703,25 +676,24 @@
  }
 
 // CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> [[TEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
 // CHECK-NEXT:    [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
 // CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
 // CHECK-NEXT:    store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    store <8 x i32> [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
-// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
-// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT:    call void @llvm.amdgcn.image.store.mip.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 15, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
 // CHECK-NEXT:    ret void
 //
 void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {