// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s

typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned int v2u32 __attribute__((ext_vector_type(2)));
typedef unsigned int v3u32 __attribute__((ext_vector_type(3)));
typedef unsigned int v4u32 __attribute__((ext_vector_type(4)));

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT:    ret i8 [[TMP0]]
//
u8 test_amdgcn_raw_ptr_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT:    ret i16 [[TMP0]]
//
u16 test_amdgcn_raw_ptr_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT:    ret i32 [[TMP0]]
//
u32 test_amdgcn_raw_ptr_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT:    ret <2 x i32> [[TMP0]]
//
v2u32 test_amdgcn_raw_ptr_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT:    ret <3 x i32> [[TMP0]]
//
v3u32 test_amdgcn_raw_ptr_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
//
v4u32 test_amdgcn_raw_ptr_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
// CHECK-NEXT:    ret i8 [[TMP0]]
//
u8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b8(rsrc, offset, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
// CHECK-NEXT:    ret i16 [[TMP0]]
//
u16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b16(rsrc, offset, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
// CHECK-NEXT:    ret i32 [[TMP0]]
//
u32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b32(rsrc, offset, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
// CHECK-NEXT:    ret <2 x i32> [[TMP0]]
//
v2u32 test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b64(rsrc, offset, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
// CHECK-NEXT:    ret <3 x i32> [[TMP0]]
//
v3u32 test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b96(rsrc, offset, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
//
v4u32 test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b128(rsrc, offset, /*soffset=*/0, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
// CHECK-NEXT:    ret i8 [[TMP0]]
//
u8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, soffset, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
// CHECK-NEXT:    ret i16 [[TMP0]]
//
u16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, soffset, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
// CHECK-NEXT:    ret i32 [[TMP0]]
//
u32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, soffset, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
// CHECK-NEXT:    ret <2 x i32> [[TMP0]]
//
v2u32 test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, soffset, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
// CHECK-NEXT:    ret <3 x i32> [[TMP0]]
//
v3u32 test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, soffset, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
//
v4u32 test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
  return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, soffset, /*aux=*/0);
}

// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_lds(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 1, i32 [[OFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
// CHECK-NEXT:    ret void
//
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int offset, int soffset) {
    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3);
}
