Skip to content

Commit

Permalink
[SelectionDAG] WidenVecOp_INSERT_SUBVECTOR - Replace `INSERT_SUBVECTO…
Browse files Browse the repository at this point in the history
…R` with series of `INSERT_VECTOR_ELT` (#124420)

If the operands to `INSERT_SUBVECTOR` can't be widened legally, just
replace the `INSERT_SUBVECTOR` with a series of `INSERT_VECTOR_ELT`.

Closes #124255 (and possibly #102016)
  • Loading branch information
abhishek-kaushik22 authored Jan 28, 2025
1 parent 606cf88 commit 015aed1
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 3 deletions.
27 changes: 24 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7040,6 +7040,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
SDValue SubVec = N->getOperand(1);
SDValue InVec = N->getOperand(0);

EVT OrigVT = SubVec.getValueType();
if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
SubVec = GetWidenedVector(SubVec);

Expand All @@ -7064,14 +7065,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
}
}

SDLoc DL(N);

// We need to make sure that the indices are still valid, otherwise we might
// widen what was previously well-defined to something undefined.
if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0)
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec,
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, InVec, SubVec,
N->getOperand(2));

report_fatal_error("Don't know how to widen the operands for "
"INSERT_SUBVECTOR");
if (!IndicesValid || OrigVT.isScalableVector())
report_fatal_error(
"Don't know how to widen the operands for INSERT_SUBVECTOR");

// If the operands can't be widened legally, just replace the INSERT_SUBVECTOR
// with a series of INSERT_VECTOR_ELT
unsigned Idx = N->getConstantOperandVal(2);

SDValue InsertElt = InVec;
EVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
for (unsigned I = 0, E = OrigVT.getVectorNumElements(); I != E; ++I) {
SDValue ExtractElt =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT.getVectorElementType(),
SubVec, DAG.getConstant(I, DL, VectorIdxTy));
InsertElt =
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsertElt, ExtractElt,
DAG.getConstant(I + Idx, DL, VectorIdxTy));
}

return InsertElt;
}

SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
Expand Down
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/X86/pr124255.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s

define <4 x i32> @insert_v2i32_in_v4i32_at_0(<4 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: insert_v2i32_in_v4i32_at_0:
; CHECK: # %bb.0:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; CHECK-NEXT: retq
%result = tail call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0)
ret <4 x i32> %result
}

define <4 x i32> @insert_v2i32_in_v4i32_at_2(<4 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: insert_v2i32_in_v4i32_at_2:
; CHECK: # %bb.0:
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
%result = tail call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 2)
ret <4 x i32> %result
}

define <4 x float> @insert_v2f32_in_v4f32_at_0(<4 x float> %a, <2 x float> %b) {
; CHECK-LABEL: insert_v2f32_in_v4f32_at_0:
; CHECK: # %bb.0:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; CHECK-NEXT: retq
%result = tail call <4 x float> @llvm.vector.insert.v4float.v2float(<4 x float> %a, <2 x float> %b, i64 0)
ret <4 x float> %result
}

define <8 x i32> @insert_v2i32_in_v8i32_at_0(<8 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: insert_v2i32_in_v8i32_at_0:
; CHECK: # %bb.0:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; CHECK-NEXT: retq
%result = tail call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %a, <2 x i32> %b, i64 0)
ret <8 x i32> %result
}

define <8 x i32> @insert_v2i32_in_v8i32_at_6(<8 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: insert_v2i32_in_v8i32_at_6:
; CHECK: # %bb.0:
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: retq
%result = tail call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %a, <2 x i32> %b, i64 6)
ret <8 x i32> %result
}

0 comments on commit 015aed1

Please sign in to comment.