; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s

define void @insert_32xi8(ptr %src, ptr %dst, i8 %in) nounwind {
; CHECK-LABEL: insert_32xi8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    xvld $xr0, $a0, 0
; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 1
; CHECK-NEXT:    xvst $xr0, $a1, 0
; CHECK-NEXT:    ret
  %v = load volatile <32 x i8>, ptr %src
  %v_new = insertelement <32 x i8> %v, i8 %in, i32 1
  store <32 x i8> %v_new, ptr %dst
  ret void
}

define void @insert_32xi8_upper(ptr %src, ptr %dst, i8 %in) nounwind {
; CHECK-LABEL: insert_32xi8_upper:
; CHECK:       # %bb.0:
; CHECK-NEXT:    xvld $xr0, $a0, 0
; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
; CHECK-NEXT:    vinsgr2vr.b $vr1, $a2, 0
; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
; CHECK-NEXT:    xvst $xr0, $a1, 0
; CHECK-NEXT:    ret
  %v = load volatile <32 x i8>, ptr %src
  %v_new = insertelement <32 x i8> %v, i8 %in, i32 16
  store <32 x i8> %v_new, ptr %dst
  ret void
}

define void @insert_16xi16(ptr %src, ptr %dst, i16 %in) nounwind {
; CHECK-LABEL: insert_16xi16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    xvld $xr0, $a0, 0
; CHECK-NEXT:    vinsgr2vr.h $vr0, $a2, 1
; CHECK-NEXT:    xvst $xr0, $a1, 0
; CHECK-NEXT:    ret
  %v = load volatile <16 x i16>, ptr %src
  %v_new = insertelement <16 x i16> %v, i16 %in, i32 1
  store <16 x i16> %v_new, ptr %dst
  ret void
}

define void @insert_16xi16_upper(ptr %src, ptr %dst, i16 %in) nounwind {
; CHECK-LABEL: insert_16xi16_upper:
; CHECK:       # %bb.0:
; CHECK-NEXT:    xvld $xr0, $a0, 0
; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
; CHECK-NEXT:    vinsgr2vr.h $vr1, $a2, 0
; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
; CHECK-NEXT:    xvst $xr0, $a1, 0
; CHECK-NEXT:    ret
  %v = load volatile <16 x i16>, ptr %src
  %v_new = insertelement <16 x i16> %v, i16 %in, i32 8
  store <16 x i16> %v_new, ptr %dst
  ret void
}

define void @insert_8xi32(ptr %src, ptr %dst, i32 %in) nounwind {
; CHECK-LABEL: insert_8xi32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    xvld $xr0, $a0, 0
; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a2, 1
; CHECK-NEXT:    xvst $xr0, $a1, 0
; CHECK-NEXT:    ret
  %v = load volatile <8 x i32>, ptr %src
  %v_new = insertelement <8 x i32> %v, i32 %in, i32 1
  store <8 x i32> %v_new, ptr %dst
  ret void
}

define void @insert_4xi64(ptr %src, ptr %dst, i64 %in) nounwind {
; CHECK-LABEL: insert_4xi64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    xvld $xr0, $a0, 0
; CHECK-NEXT:    xvinsgr2vr.d $xr0, $a2, 1
; CHECK-NEXT:    xvst $xr0, $a1, 0
; CHECK-NEXT:    ret
  %v = load volatile <4 x i64>, ptr %src
  %v_new = insertelement <4 x i64> %v, i64 %in, i32 1
  store <4 x i64> %v_new, ptr %dst
  ret void
}

define void @insert_8xfloat(ptr %src, ptr %dst, float %in) nounwind {
; CHECK-LABEL: insert_8xfloat:
; CHECK:       # %bb.0:
; CHECK-NEXT:    xvld $xr1, $a0, 0
; CHECK-NEXT:    movfr2gr.s $a0, $fa0
; CHECK-NEXT:    xvinsgr2vr.w $xr1, $a0, 1
; CHECK-NEXT:    xvst $xr1, $a1, 0
; CHECK-NEXT:    ret
  %v = load volatile <8 x float>, ptr %src
  %v_new = insertelement <8 x float> %v, float %in, i32 1
  store <8 x float> %v_new, ptr %dst
  ret void
}

define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind {
; CHECK-LABEL: insert_4xdouble:
; CHECK:       # %bb.0:
; CHECK-NEXT:    xvld $xr1, $a0, 0
; CHECK-NEXT:    movfr2gr.d $a0, $fa0
; CHECK-NEXT:    xvinsgr2vr.d $xr1, $a0, 1
; CHECK-NEXT:    xvst $xr1, $a1, 0
; CHECK-NEXT:    ret
  %v = load volatile <4 x double>, ptr %src
  %v_new = insertelement <4 x double> %v, double %in, i32 1
  store <4 x double> %v_new, ptr %dst
  ret void
}

define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_32xi8_idx:
; CHECK:       # %bb.0:
; CHECK-NEXT:    addi.d $sp, $sp, -96
; CHECK-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT:    st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT:    addi.d $fp, $sp, 96
; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT:    xvld $xr0, $a0, 0
; CHECK-NEXT:    xvst $xr0, $sp, 32
; CHECK-NEXT:    addi.d $a0, $sp, 32
; CHECK-NEXT:    bstrins.d $a0, $a3, 4, 0
; CHECK-NEXT:    st.b $a2, $a0, 0
; CHECK-NEXT:    xvld $xr0, $sp, 32
; CHECK-NEXT:    xvst $xr0, $a1, 0
; CHECK-NEXT:    addi.d $sp, $fp, -96
; CHECK-NEXT:    ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT:    addi.d $sp, $sp, 96
; CHECK-NEXT:    ret
  %v = load volatile <32 x i8>, ptr %src
  %v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx
  store <32 x i8> %v_new, ptr %dst
  ret void
}

define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_16xi16_idx:
; CHECK:       # %bb.0:
; CHECK-NEXT:    addi.d $sp, $sp, -96
; CHECK-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT:    st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT:    addi.d $fp, $sp, 96
; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT:    xvld $xr0, $a0, 0
; CHECK-NEXT:    xvst $xr0, $sp, 32
; CHECK-NEXT:    addi.d $a0, $sp, 32
; CHECK-NEXT:    bstrins.d $a0, $a3, 4, 1
; CHECK-NEXT:    st.h $a2, $a0, 0
; CHECK-NEXT:    xvld $xr0, $sp, 32
; CHECK-NEXT:    xvst $xr0, $a1, 0
; CHECK-NEXT:    addi.d $sp, $fp, -96
; CHECK-NEXT:    ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT:    addi.d $sp, $sp, 96
; CHECK-NEXT:    ret
  %v = load volatile <16 x i16>, ptr %src
  %v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx
  store <16 x i16> %v_new, ptr %dst
  ret void
}

define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_8xi32_idx:
; CHECK:       # %bb.0:
; CHECK-NEXT:    addi.d $sp, $sp, -96
; CHECK-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT:    st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT:    addi.d $fp, $sp, 96
; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT:    xvld $xr0, $a0, 0
; CHECK-NEXT:    xvst $xr0, $sp, 32
; CHECK-NEXT:    addi.d $a0, $sp, 32
; CHECK-NEXT:    bstrins.d $a0, $a3, 4, 2
; CHECK-NEXT:    st.w $a2, $a0, 0
; CHECK-NEXT:    xvld $xr0, $sp, 32
; CHECK-NEXT:    xvst $xr0, $a1, 0
; CHECK-NEXT:    addi.d $sp, $fp, -96
; CHECK-NEXT:    ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT:    addi.d $sp, $sp, 96
; CHECK-NEXT:    ret
  %v = load volatile <8 x i32>, ptr %src
  %v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx
  store <8 x i32> %v_new, ptr %dst
  ret void
}

define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_4xi64_idx:
; CHECK:       # %bb.0:
; CHECK-NEXT:    addi.d $sp, $sp, -96
; CHECK-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT:    st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT:    addi.d $fp, $sp, 96
; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT:    xvld $xr0, $a0, 0
; CHECK-NEXT:    xvst $xr0, $sp, 32
; CHECK-NEXT:    addi.d $a0, $sp, 32
; CHECK-NEXT:    bstrins.d $a0, $a3, 4, 3
; CHECK-NEXT:    st.d $a2, $a0, 0
; CHECK-NEXT:    xvld $xr0, $sp, 32
; CHECK-NEXT:    xvst $xr0, $a1, 0
; CHECK-NEXT:    addi.d $sp, $fp, -96
; CHECK-NEXT:    ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT:    addi.d $sp, $sp, 96
; CHECK-NEXT:    ret
  %v = load volatile <4 x i64>, ptr %src
  %v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx
  store <4 x i64> %v_new, ptr %dst
  ret void
}

define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_8xfloat_idx:
; CHECK:       # %bb.0:
; CHECK-NEXT:    addi.d $sp, $sp, -96
; CHECK-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT:    st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT:    addi.d $fp, $sp, 96
; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT:    xvld $xr1, $a0, 0
; CHECK-NEXT:    xvst $xr1, $sp, 32
; CHECK-NEXT:    addi.d $a0, $sp, 32
; CHECK-NEXT:    bstrins.d $a0, $a2, 4, 2
; CHECK-NEXT:    fst.s $fa0, $a0, 0
; CHECK-NEXT:    xvld $xr0, $sp, 32
; CHECK-NEXT:    xvst $xr0, $a1, 0
; CHECK-NEXT:    addi.d $sp, $fp, -96
; CHECK-NEXT:    ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT:    addi.d $sp, $sp, 96
; CHECK-NEXT:    ret
  %v = load volatile <8 x float>, ptr %src
  %v_new = insertelement <8 x float> %v, float %in, i32 %idx
  store <8 x float> %v_new, ptr %dst
  ret void
}

define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_4xdouble_idx:
; CHECK:       # %bb.0:
; CHECK-NEXT:    addi.d $sp, $sp, -96
; CHECK-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT:    st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT:    addi.d $fp, $sp, 96
; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT:    xvld $xr1, $a0, 0
; CHECK-NEXT:    xvst $xr1, $sp, 32
; CHECK-NEXT:    addi.d $a0, $sp, 32
; CHECK-NEXT:    bstrins.d $a0, $a2, 4, 3
; CHECK-NEXT:    fst.d $fa0, $a0, 0
; CHECK-NEXT:    xvld $xr0, $sp, 32
; CHECK-NEXT:    xvst $xr0, $a1, 0
; CHECK-NEXT:    addi.d $sp, $fp, -96
; CHECK-NEXT:    ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT:    addi.d $sp, $sp, 96
; CHECK-NEXT:    ret
  %v = load volatile <4 x double>, ptr %src
  %v_new = insertelement <4 x double> %v, double %in, i32 %idx
  store <4 x double> %v_new, ptr %dst
  ret void
}
