@@ -3941,6 +3941,24 @@ static bool canScaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts) {
39413941 return scaleShuffleElements(Mask, NumDstElts, ScaledMask);
39423942}
39433943
3944+ // Helper to grow the shuffle mask for a larger value type.
3945+ // NOTE: This is different to scaleShuffleElements which is a same size type.
3946+ static void growShuffleMask(ArrayRef<int> SrcMask,
3947+ SmallVectorImpl<int> &DstMask,
3948+ unsigned SrcSizeInBits, unsigned DstSizeInBits) {
3949+ assert(DstMask.empty() && "Expected an empty shuffle mas");
3950+ assert((DstSizeInBits % SrcSizeInBits) == 0 && "Illegal shuffle scale");
3951+ unsigned Scale = DstSizeInBits / SrcSizeInBits;
3952+ unsigned NumSrcElts = SrcMask.size();
3953+ DstMask.assign(SrcMask.begin(), SrcMask.end());
3954+ for (int &M : DstMask) {
3955+ if (M < 0)
3956+ continue;
3957+ M = (M % NumSrcElts) + ((M / NumSrcElts) * Scale * NumSrcElts);
3958+ }
3959+ DstMask.append((Scale - 1) * NumSrcElts, SM_SentinelUndef);
3960+ }
3961+
39443962/// Returns true if Elt is a constant zero or a floating point constant +0.0.
39453963bool X86::isZeroNode(SDValue Elt) {
39463964 return isNullConstant(Elt) || isNullFPConstant(Elt);
@@ -40456,19 +40474,13 @@ static SDValue combineX86ShuffleChainWithExtract(
4045640474 }
4045740475
4045840476 // Bail if we fail to find a source larger than the existing root.
40459- unsigned Scale = WideSizeInBits / RootSizeInBits;
4046040477 if (WideSizeInBits <= RootSizeInBits ||
4046140478 (WideSizeInBits % RootSizeInBits) != 0)
4046240479 return SDValue();
4046340480
4046440481 // Create new mask for larger type.
40465- SmallVector<int, 64> WideMask(BaseMask);
40466- for (int &M : WideMask) {
40467- if (M < 0)
40468- continue;
40469- M = (M % NumMaskElts) + ((M / NumMaskElts) * Scale * NumMaskElts);
40470- }
40471- WideMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef);
40482+ SmallVector<int, 64> WideMask;
40483+ growShuffleMask(BaseMask, WideMask, RootSizeInBits, WideSizeInBits);
4047240484
4047340485 // Attempt to peek through inputs and adjust mask when we extract from an
4047440486 // upper subvector.
0 commit comments