; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2025 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================

; REQUIRES: regkeys
; RUN: igc_opt -S --igc-split-loads -platformpvc --regkey=LS_enableLoadSplitting=1 --regkey=LS_onlyStrided=0 --regkey=LS_ignoreSplitThreshold=1 --regkey=LS_minSplitSize_GRF=0 --regkey=LS_minSplitSize_E=0 %s | FileCheck %s --check-prefix=MINSPLIT

declare void @fun_v1i32(<1 x i32>)
declare void @fun_v2i32(<2 x i32>)
declare void @fun_v4i32(<4 x i32>)
declare void @fun_v8i32(<8 x i32>)
declare void @fun_v1i16(<1 x i16>)
declare void @fun_v2i16(<2 x i16>)
declare void @fun_v4i16(<4 x i16>)
declare void @fun_v8i16(<8 x i16>)
declare void @fun_v1i8(<1 x i8>)
declare void @fun_v2i8(<2 x i8>)
declare void @fun_v4i8(<4 x i8>)
declare void @fun_v8i8(<8 x i8>)

declare <16 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v16i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32)
declare <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32)
declare <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32)

define void @i32_16x1_to_8x1(i64 %ptr) {
; MINSPLIT-LABEL: @i32_16x1_to_8x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v8i32(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 32, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v8i32(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 8, i32 32, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v8i32(<8 x i32> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v8i32(<8 x i32> [[TMP2]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v16i32(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 32, i32 16, i32 16, i32 1, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i32> %vec, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %pick.1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  call void @fun_v8i32(<8 x i32> %pick.0)
  call void @fun_v8i32(<8 x i32> %pick.1)
  ret void
}

define void @i32_16x1_to_4x1(i64 %ptr) {
; MINSPLIT-LABEL: @i32_16x1_to_4x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v4i32(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 32, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v4i32(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 32, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v4i32(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 8, i32 32, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v4i32(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 12, i32 32, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v4i32(<4 x i32> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v4i32(<4 x i32> [[TMP2]])
; MINSPLIT-NEXT:    call void @fun_v4i32(<4 x i32> [[TMP3]])
; MINSPLIT-NEXT:    call void @fun_v4i32(<4 x i32> [[TMP4]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v16i32(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 32, i32 16, i32 16, i32 1, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i32> %vec, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %pick.1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %pick.2 = shufflevector <16 x i32> %vec, <16 x i32> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
  %pick.3 = shufflevector <16 x i32> %vec, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
  call void @fun_v4i32(<4 x i32> %pick.0)
  call void @fun_v4i32(<4 x i32> %pick.1)
  call void @fun_v4i32(<4 x i32> %pick.2)
  call void @fun_v4i32(<4 x i32> %pick.3)
  ret void
}

define void @i32_16x1_to_2x1(i64 %ptr) {
; MINSPLIT-LABEL: @i32_16x1_to_2x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v2i32(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 32, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v2i32(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 2, i32 32, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v2i32(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 32, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP4:%.*]] = call <2 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v2i32(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 6, i32 32, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP5:%.*]] = call <2 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v2i32(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 8, i32 32, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP6:%.*]] = call <2 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v2i32(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 10, i32 32, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v2i32(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 12, i32 32, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP8:%.*]] = call <2 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v2i32(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 14, i32 32, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v2i32(<2 x i32> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v2i32(<2 x i32> [[TMP2]])
; MINSPLIT-NEXT:    call void @fun_v2i32(<2 x i32> [[TMP3]])
; MINSPLIT-NEXT:    call void @fun_v2i32(<2 x i32> [[TMP4]])
; MINSPLIT-NEXT:    call void @fun_v2i32(<2 x i32> [[TMP5]])
; MINSPLIT-NEXT:    call void @fun_v2i32(<2 x i32> [[TMP6]])
; MINSPLIT-NEXT:    call void @fun_v2i32(<2 x i32> [[TMP7]])
; MINSPLIT-NEXT:    call void @fun_v2i32(<2 x i32> [[TMP8]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v16i32(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 32, i32 16, i32 16, i32 1, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i32> %vec, <16 x i32> undef, <2 x i32> <i32 0, i32 1>
  %pick.1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <2 x i32> <i32 2, i32 3>
  %pick.2 = shufflevector <16 x i32> %vec, <16 x i32> undef, <2 x i32> <i32 4, i32 5>
  %pick.3 = shufflevector <16 x i32> %vec, <16 x i32> undef, <2 x i32> <i32 6, i32 7>
  %pick.4 = shufflevector <16 x i32> %vec, <16 x i32> undef, <2 x i32> <i32 8, i32 9>
  %pick.5 = shufflevector <16 x i32> %vec, <16 x i32> undef, <2 x i32> <i32 10, i32 11>
  %pick.6 = shufflevector <16 x i32> %vec, <16 x i32> undef, <2 x i32> <i32 12, i32 13>
  %pick.7 = shufflevector <16 x i32> %vec, <16 x i32> undef, <2 x i32> <i32 14, i32 15>
  call void @fun_v2i32(<2 x i32> %pick.0)
  call void @fun_v2i32(<2 x i32> %pick.1)
  call void @fun_v2i32(<2 x i32> %pick.2)
  call void @fun_v2i32(<2 x i32> %pick.3)
  call void @fun_v2i32(<2 x i32> %pick.4)
  call void @fun_v2i32(<2 x i32> %pick.5)
  call void @fun_v2i32(<2 x i32> %pick.6)
  call void @fun_v2i32(<2 x i32> %pick.7)
  ret void
}

define void @i32_16x1_to_1x1(i64 %ptr) {
; MINSPLIT-LABEL: @i32_16x1_to_1x1(
; MINSPLIT-NEXT:    [[VEC:%.*]] = call <16 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v16i32(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 32, i32 16, i32 16, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[PICK_0:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> zeroinitializer
; MINSPLIT-NEXT:    [[PICK_1:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 1>
; MINSPLIT-NEXT:    [[PICK_2:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 2>
; MINSPLIT-NEXT:    [[PICK_3:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 3>
; MINSPLIT-NEXT:    [[PICK_4:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 4>
; MINSPLIT-NEXT:    [[PICK_5:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 5>
; MINSPLIT-NEXT:    [[PICK_6:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 6>
; MINSPLIT-NEXT:    [[PICK_7:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 7>
; MINSPLIT-NEXT:    [[PICK_8:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 8>
; MINSPLIT-NEXT:    [[PICK_9:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 9>
; MINSPLIT-NEXT:    [[PICK_10:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 10>
; MINSPLIT-NEXT:    [[PICK_11:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 11>
; MINSPLIT-NEXT:    [[PICK_12:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 12>
; MINSPLIT-NEXT:    [[PICK_13:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 13>
; MINSPLIT-NEXT:    [[PICK_14:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 14>
; MINSPLIT-NEXT:    [[PICK_15:%.*]] = shufflevector <16 x i32> [[VEC]], <16 x i32> undef, <1 x i32> <i32 15>
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_0]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_1]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_2]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_3]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_4]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_5]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_6]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_7]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_8]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_9]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_10]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_11]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_12]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_13]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_14]])
; MINSPLIT-NEXT:    call void @fun_v1i32(<1 x i32> [[PICK_15]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v16i32(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 32, i32 16, i32 16, i32 1, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 0>
  %pick.1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 1>
  %pick.2 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 2>
  %pick.3 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 3>
  %pick.4 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 4>
  %pick.5 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 5>
  %pick.6 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 6>
  %pick.7 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 7>
  %pick.8 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 8>
  %pick.9 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 9>
  %pick.10 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 10>
  %pick.11 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 11>
  %pick.12 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 12>
  %pick.13 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 13>
  %pick.14 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 14>
  %pick.15 = shufflevector <16 x i32> %vec, <16 x i32> undef, <1 x i32> <i32 15>
  call void @fun_v1i32(<1 x i32> %pick.0)
  call void @fun_v1i32(<1 x i32> %pick.1)
  call void @fun_v1i32(<1 x i32> %pick.2)
  call void @fun_v1i32(<1 x i32> %pick.3)
  call void @fun_v1i32(<1 x i32> %pick.4)
  call void @fun_v1i32(<1 x i32> %pick.5)
  call void @fun_v1i32(<1 x i32> %pick.6)
  call void @fun_v1i32(<1 x i32> %pick.7)
  call void @fun_v1i32(<1 x i32> %pick.8)
  call void @fun_v1i32(<1 x i32> %pick.9)
  call void @fun_v1i32(<1 x i32> %pick.10)
  call void @fun_v1i32(<1 x i32> %pick.11)
  call void @fun_v1i32(<1 x i32> %pick.12)
  call void @fun_v1i32(<1 x i32> %pick.13)
  call void @fun_v1i32(<1 x i32> %pick.14)
  call void @fun_v1i32(<1 x i32> %pick.15)
  ret void
}

define void @i16_16x1_to_8x1(i64 %ptr) {
; MINSPLIT-LABEL: @i16_16x1_to_8x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 8, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP2]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 16, i32 1, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i16> %vec, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %pick.1 = shufflevector <16 x i16> %vec, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  call void @fun_v8i16(<8 x i16> %pick.0)
  call void @fun_v8i16(<8 x i16> %pick.1)
  ret void
}

define void @i16_8x2_to_8x1(i64 %ptr) {
; MINSPLIT-LABEL: @i16_8x2_to_8x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 0, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP2]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i16> %vec, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %pick.1 = shufflevector <16 x i16> %vec, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  call void @fun_v8i16(<8 x i16> %pick.0)
  call void @fun_v8i16(<8 x i16> %pick.1)
  ret void
}

define void @i16_8x2_to_4x2(i64 %ptr) {
; MINSPLIT-LABEL: @i16_8x2_to_4x2(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 16, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v8i16(<8 x i16> [[TMP2]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i16> %vec, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
  %pick.1 = shufflevector <16 x i16> %vec, <16 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
  call void @fun_v8i16(<8 x i16> %pick.0)
  call void @fun_v8i16(<8 x i16> %pick.1)
  ret void
}

define void @i16_16x1_to_4x1(i64 %ptr) {
; MINSPLIT-LABEL: @i16_16x1_to_4x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 16, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 8, i32 16, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP4:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 12, i32 16, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v4i16(<4 x i16> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v4i16(<4 x i16> [[TMP2]])
; MINSPLIT-NEXT:    call void @fun_v4i16(<4 x i16> [[TMP3]])
; MINSPLIT-NEXT:    call void @fun_v4i16(<4 x i16> [[TMP4]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 16, i32 1, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i16> %vec, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %pick.1 = shufflevector <16 x i16> %vec, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %pick.2 = shufflevector <16 x i16> %vec, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
  %pick.3 = shufflevector <16 x i16> %vec, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
  call void @fun_v4i16(<4 x i16> %pick.0)
  call void @fun_v4i16(<4 x i16> %pick.1)
  call void @fun_v4i16(<4 x i16> %pick.2)
  call void @fun_v4i16(<4 x i16> %pick.3)
  ret void
}

define void @i16_8x2_to_4x1(i64 %ptr) {
; MINSPLIT-LABEL: @i16_8x2_to_4x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 16, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 0, i32 16, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP4:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 4, i32 16, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v4i16(<4 x i16> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v4i16(<4 x i16> [[TMP2]])
; MINSPLIT-NEXT:    call void @fun_v4i16(<4 x i16> [[TMP3]])
; MINSPLIT-NEXT:    call void @fun_v4i16(<4 x i16> [[TMP4]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i16> %vec, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %pick.1 = shufflevector <16 x i16> %vec, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %pick.2 = shufflevector <16 x i16> %vec, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
  %pick.3 = shufflevector <16 x i16> %vec, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
  call void @fun_v4i16(<4 x i16> %pick.0)
  call void @fun_v4i16(<4 x i16> %pick.1)
  call void @fun_v4i16(<4 x i16> %pick.2)
  call void @fun_v4i16(<4 x i16> %pick.3)
  ret void
}

define void @i16_8x2_to_2x2(i64 %ptr) {
; MINSPLIT-LABEL: @i16_8x2_to_2x2(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 2, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 2, i32 16, i32 16, i32 2, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 16, i32 16, i32 2, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP4:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 6, i32 16, i32 16, i32 2, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v4i16(<4 x i16> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v4i16(<4 x i16> [[TMP2]])
; MINSPLIT-NEXT:    call void @fun_v4i16(<4 x i16> [[TMP3]])
; MINSPLIT-NEXT:    call void @fun_v4i16(<4 x i16> [[TMP4]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i16> %vec, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 8, i32 9>
  %pick.1 = shufflevector <16 x i16> %vec, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 10, i32 11>
  %pick.2 = shufflevector <16 x i16> %vec, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 12, i32 13>
  %pick.3 = shufflevector <16 x i16> %vec, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 14, i32 15>
  call void @fun_v4i16(<4 x i16> %pick.0)
  call void @fun_v4i16(<4 x i16> %pick.1)
  call void @fun_v4i16(<4 x i16> %pick.2)
  call void @fun_v4i16(<4 x i16> %pick.3)
  ret void
}

define void @i16_8x2_to_2x1(i64 %ptr) {
; MINSPLIT-LABEL: @i16_8x2_to_2x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 2, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP3:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP4:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 6, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP5:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 0, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP6:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 2, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP7:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 4, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP8:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 6, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP2]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP3]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP4]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP5]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP6]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP7]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP8]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 0, i32 1>
  %pick.1 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 2, i32 3>
  %pick.2 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 4, i32 5>
  %pick.3 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 6, i32 7>
  %pick.4 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 8, i32 9>
  %pick.5 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 10, i32 11>
  %pick.6 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 12, i32 13>
  %pick.7 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 14, i32 15>
  call void @fun_v2i16(<2 x i16> %pick.0)
  call void @fun_v2i16(<2 x i16> %pick.1)
  call void @fun_v2i16(<2 x i16> %pick.2)
  call void @fun_v2i16(<2 x i16> %pick.3)
  call void @fun_v2i16(<2 x i16> %pick.4)
  call void @fun_v2i16(<2 x i16> %pick.5)
  call void @fun_v2i16(<2 x i16> %pick.6)
  call void @fun_v2i16(<2 x i16> %pick.7)
  ret void
}

define void @i16_8x2_to_1x2(i64 %ptr) {
; MINSPLIT-LABEL: @i16_8x2_to_1x2(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 2, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = extractelement <4 x i16> [[TMP1]], i64 0
; MINSPLIT-NEXT:    [[TMP3:%.*]] = insertelement <2 x i16> undef, i16 [[TMP2]], i64 0
; MINSPLIT-NEXT:    [[TMP4:%.*]] = extractelement <4 x i16> [[TMP1]], i64 2
; MINSPLIT-NEXT:    [[TMP5:%.*]] = insertelement <2 x i16> [[TMP3]], i16 [[TMP4]], i64 1
; MINSPLIT-NEXT:    [[TMP6:%.*]] = extractelement <4 x i16> [[TMP1]], i64 1
; MINSPLIT-NEXT:    [[TMP7:%.*]] = insertelement <2 x i16> undef, i16 [[TMP6]], i64 0
; MINSPLIT-NEXT:    [[TMP8:%.*]] = extractelement <4 x i16> [[TMP1]], i64 3
; MINSPLIT-NEXT:    [[TMP9:%.*]] = insertelement <2 x i16> [[TMP7]], i16 [[TMP8]], i64 1
; MINSPLIT-NEXT:    [[TMP10:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 2, i32 16, i32 16, i32 2, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP11:%.*]] = extractelement <4 x i16> [[TMP10]], i64 0
; MINSPLIT-NEXT:    [[TMP12:%.*]] = insertelement <2 x i16> undef, i16 [[TMP11]], i64 0
; MINSPLIT-NEXT:    [[TMP13:%.*]] = extractelement <4 x i16> [[TMP10]], i64 2
; MINSPLIT-NEXT:    [[TMP14:%.*]] = insertelement <2 x i16> [[TMP12]], i16 [[TMP13]], i64 1
; MINSPLIT-NEXT:    [[TMP15:%.*]] = extractelement <4 x i16> [[TMP10]], i64 1
; MINSPLIT-NEXT:    [[TMP16:%.*]] = insertelement <2 x i16> undef, i16 [[TMP15]], i64 0
; MINSPLIT-NEXT:    [[TMP17:%.*]] = extractelement <4 x i16> [[TMP10]], i64 3
; MINSPLIT-NEXT:    [[TMP18:%.*]] = insertelement <2 x i16> [[TMP16]], i16 [[TMP17]], i64 1
; MINSPLIT-NEXT:    [[TMP19:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 16, i32 16, i32 2, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP20:%.*]] = extractelement <4 x i16> [[TMP19]], i64 0
; MINSPLIT-NEXT:    [[TMP21:%.*]] = insertelement <2 x i16> undef, i16 [[TMP20]], i64 0
; MINSPLIT-NEXT:    [[TMP22:%.*]] = extractelement <4 x i16> [[TMP19]], i64 2
; MINSPLIT-NEXT:    [[TMP23:%.*]] = insertelement <2 x i16> [[TMP21]], i16 [[TMP22]], i64 1
; MINSPLIT-NEXT:    [[TMP24:%.*]] = extractelement <4 x i16> [[TMP19]], i64 1
; MINSPLIT-NEXT:    [[TMP25:%.*]] = insertelement <2 x i16> undef, i16 [[TMP24]], i64 0
; MINSPLIT-NEXT:    [[TMP26:%.*]] = extractelement <4 x i16> [[TMP19]], i64 3
; MINSPLIT-NEXT:    [[TMP27:%.*]] = insertelement <2 x i16> [[TMP25]], i16 [[TMP26]], i64 1
; MINSPLIT-NEXT:    [[TMP28:%.*]] = call <4 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v4i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 6, i32 16, i32 16, i32 2, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP29:%.*]] = extractelement <4 x i16> [[TMP28]], i64 0
; MINSPLIT-NEXT:    [[TMP30:%.*]] = insertelement <2 x i16> undef, i16 [[TMP29]], i64 0
; MINSPLIT-NEXT:    [[TMP31:%.*]] = extractelement <4 x i16> [[TMP28]], i64 2
; MINSPLIT-NEXT:    [[TMP32:%.*]] = insertelement <2 x i16> [[TMP30]], i16 [[TMP31]], i64 1
; MINSPLIT-NEXT:    [[TMP33:%.*]] = extractelement <4 x i16> [[TMP28]], i64 1
; MINSPLIT-NEXT:    [[TMP34:%.*]] = insertelement <2 x i16> undef, i16 [[TMP33]], i64 0
; MINSPLIT-NEXT:    [[TMP35:%.*]] = extractelement <4 x i16> [[TMP28]], i64 3
; MINSPLIT-NEXT:    [[TMP36:%.*]] = insertelement <2 x i16> [[TMP34]], i16 [[TMP35]], i64 1
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP5]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP9]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP14]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP18]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP23]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP27]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP32]])
; MINSPLIT-NEXT:    call void @fun_v2i16(<2 x i16> [[TMP36]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 0, i32 8>
  %pick.1 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 1, i32 9>
  %pick.2 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 2, i32 10>
  %pick.3 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 3, i32 11>
  %pick.4 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 4, i32 12>
  %pick.5 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 5, i32 13>
  %pick.6 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 6, i32 14>
  %pick.7 = shufflevector <16 x i16> %vec, <16 x i16> undef, <2 x i32> <i32 7, i32 15>
  call void @fun_v2i16(<2 x i16> %pick.0)
  call void @fun_v2i16(<2 x i16> %pick.1)
  call void @fun_v2i16(<2 x i16> %pick.2)
  call void @fun_v2i16(<2 x i16> %pick.3)
  call void @fun_v2i16(<2 x i16> %pick.4)
  call void @fun_v2i16(<2 x i16> %pick.5)
  call void @fun_v2i16(<2 x i16> %pick.6)
  call void @fun_v2i16(<2 x i16> %pick.7)
  ret void
}

define void @i16_8x2_to_1x1(i64 %ptr) {
; MINSPLIT-LABEL: @i16_8x2_to_1x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = extractelement <2 x i16> [[TMP1]], i64 0
; MINSPLIT-NEXT:    [[TMP3:%.*]] = insertelement <1 x i16> undef, i16 [[TMP2]], i64 0
; MINSPLIT-NEXT:    [[TMP4:%.*]] = extractelement <2 x i16> [[TMP1]], i64 1
; MINSPLIT-NEXT:    [[TMP5:%.*]] = insertelement <1 x i16> undef, i16 [[TMP4]], i64 0
; MINSPLIT-NEXT:    [[TMP6:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 2, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP7:%.*]] = extractelement <2 x i16> [[TMP6]], i64 0
; MINSPLIT-NEXT:    [[TMP8:%.*]] = insertelement <1 x i16> undef, i16 [[TMP7]], i64 0
; MINSPLIT-NEXT:    [[TMP9:%.*]] = extractelement <2 x i16> [[TMP6]], i64 1
; MINSPLIT-NEXT:    [[TMP10:%.*]] = insertelement <1 x i16> undef, i16 [[TMP9]], i64 0
; MINSPLIT-NEXT:    [[TMP11:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP12:%.*]] = extractelement <2 x i16> [[TMP11]], i64 0
; MINSPLIT-NEXT:    [[TMP13:%.*]] = insertelement <1 x i16> undef, i16 [[TMP12]], i64 0
; MINSPLIT-NEXT:    [[TMP14:%.*]] = extractelement <2 x i16> [[TMP11]], i64 1
; MINSPLIT-NEXT:    [[TMP15:%.*]] = insertelement <1 x i16> undef, i16 [[TMP14]], i64 0
; MINSPLIT-NEXT:    [[TMP16:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 6, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP17:%.*]] = extractelement <2 x i16> [[TMP16]], i64 0
; MINSPLIT-NEXT:    [[TMP18:%.*]] = insertelement <1 x i16> undef, i16 [[TMP17]], i64 0
; MINSPLIT-NEXT:    [[TMP19:%.*]] = extractelement <2 x i16> [[TMP16]], i64 1
; MINSPLIT-NEXT:    [[TMP20:%.*]] = insertelement <1 x i16> undef, i16 [[TMP19]], i64 0
; MINSPLIT-NEXT:    [[TMP21:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 0, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP22:%.*]] = extractelement <2 x i16> [[TMP21]], i64 0
; MINSPLIT-NEXT:    [[TMP23:%.*]] = insertelement <1 x i16> undef, i16 [[TMP22]], i64 0
; MINSPLIT-NEXT:    [[TMP24:%.*]] = extractelement <2 x i16> [[TMP21]], i64 1
; MINSPLIT-NEXT:    [[TMP25:%.*]] = insertelement <1 x i16> undef, i16 [[TMP24]], i64 0
; MINSPLIT-NEXT:    [[TMP26:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 2, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP27:%.*]] = extractelement <2 x i16> [[TMP26]], i64 0
; MINSPLIT-NEXT:    [[TMP28:%.*]] = insertelement <1 x i16> undef, i16 [[TMP27]], i64 0
; MINSPLIT-NEXT:    [[TMP29:%.*]] = extractelement <2 x i16> [[TMP26]], i64 1
; MINSPLIT-NEXT:    [[TMP30:%.*]] = insertelement <1 x i16> undef, i16 [[TMP29]], i64 0
; MINSPLIT-NEXT:    [[TMP31:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 4, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP32:%.*]] = extractelement <2 x i16> [[TMP31]], i64 0
; MINSPLIT-NEXT:    [[TMP33:%.*]] = insertelement <1 x i16> undef, i16 [[TMP32]], i64 0
; MINSPLIT-NEXT:    [[TMP34:%.*]] = extractelement <2 x i16> [[TMP31]], i64 1
; MINSPLIT-NEXT:    [[TMP35:%.*]] = insertelement <1 x i16> undef, i16 [[TMP34]], i64 0
; MINSPLIT-NEXT:    [[TMP36:%.*]] = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 6, i32 16, i32 16, i32 2, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP37:%.*]] = extractelement <2 x i16> [[TMP36]], i64 0
; MINSPLIT-NEXT:    [[TMP38:%.*]] = insertelement <1 x i16> undef, i16 [[TMP37]], i64 0
; MINSPLIT-NEXT:    [[TMP39:%.*]] = extractelement <2 x i16> [[TMP36]], i64 1
; MINSPLIT-NEXT:    [[TMP40:%.*]] = insertelement <1 x i16> undef, i16 [[TMP39]], i64 0
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP3]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP5]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP8]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP10]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP13]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP15]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP18]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP20]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP23]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP25]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP28]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP30]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP33]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP35]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP38]])
; MINSPLIT-NEXT:    call void @fun_v1i16(<1 x i16> [[TMP40]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v16i16(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 16, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 0>
  %pick.1 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 1>
  %pick.2 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 2>
  %pick.3 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 3>
  %pick.4 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 4>
  %pick.5 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 5>
  %pick.6 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 6>
  %pick.7 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 7>
  %pick.8 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 8>
  %pick.9 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 9>
  %pick.10 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 10>
  %pick.11 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 11>
  %pick.12 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 12>
  %pick.13 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 13>
  %pick.14 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 14>
  %pick.15 = shufflevector <16 x i16> %vec, <16 x i16> undef, <1 x i32> <i32 15>
  call void @fun_v1i16(<1 x i16> %pick.0)
  call void @fun_v1i16(<1 x i16> %pick.1)
  call void @fun_v1i16(<1 x i16> %pick.2)
  call void @fun_v1i16(<1 x i16> %pick.3)
  call void @fun_v1i16(<1 x i16> %pick.4)
  call void @fun_v1i16(<1 x i16> %pick.5)
  call void @fun_v1i16(<1 x i16> %pick.6)
  call void @fun_v1i16(<1 x i16> %pick.7)
  call void @fun_v1i16(<1 x i16> %pick.8)
  call void @fun_v1i16(<1 x i16> %pick.9)
  call void @fun_v1i16(<1 x i16> %pick.10)
  call void @fun_v1i16(<1 x i16> %pick.11)
  call void @fun_v1i16(<1 x i16> %pick.12)
  call void @fun_v1i16(<1 x i16> %pick.13)
  call void @fun_v1i16(<1 x i16> %pick.14)
  call void @fun_v1i16(<1 x i16> %pick.15)
  ret void
}

define void @i8_16x1_to_8x1(i64 %ptr) {
; MINSPLIT-LABEL: @i8_16x1_to_8x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 8, i32 8, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[TMP2]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 16, i32 1, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  call void @fun_v8i8(<8 x i8> %pick.0)
  call void @fun_v8i8(<8 x i8> %pick.1)
  ret void
}

define void @i8_8x2_to_8x1(i64 %ptr) {
; MINSPLIT-LABEL: @i8_8x2_to_8x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 0, i32 8, i32 16, i32 8, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[TMP2]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  call void @fun_v8i8(<8 x i8> %pick.0)
  call void @fun_v8i8(<8 x i8> %pick.1)
  ret void
}

define void @i8_8x2_to_4x2(i64 %ptr) {
; MINSPLIT-LABEL: @i8_8x2_to_4x2(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 8, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[TMP2]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
  call void @fun_v8i8(<8 x i8> %pick.0)
  call void @fun_v8i8(<8 x i8> %pick.1)
  ret void
}

define void @i8_4x4_to_8x1(i64 %ptr) {
; MINSPLIT-LABEL: @i8_4x4_to_8x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 32, i32 0, i32 8, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[TMP2]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  call void @fun_v8i8(<8 x i8> %pick.0)
  call void @fun_v8i8(<8 x i8> %pick.1)
  ret void
}

define void @i8_4x4_to_4x2(i64 %ptr) {
; MINSPLIT-LABEL: @i8_4x4_to_4x2(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 32, i32 0, i32 8, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[TMP2]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  call void @fun_v8i8(<8 x i8> %pick.0)
  call void @fun_v8i8(<8 x i8> %pick.1)
  ret void
}

define void @i8_4x4_to_4x2_fail(i64 %ptr) {
; MINSPLIT-LABEL: @i8_4x4_to_4x2_fail(
; MINSPLIT-NEXT:    [[VEC:%.*]] = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[PICK_0:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; MINSPLIT-NEXT:    [[PICK_1:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[PICK_0]])
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[PICK_1]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
  call void @fun_v8i8(<8 x i8> %pick.0)
  call void @fun_v8i8(<8 x i8> %pick.1)
  ret void
}

define void @i8_4x4_to_2x4(i64 %ptr) {
; MINSPLIT-LABEL: @i8_4x4_to_2x4(
; MINSPLIT-NEXT:    [[VEC:%.*]] = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[PICK_0:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
; MINSPLIT-NEXT:    [[PICK_1:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 10, i32 11, i32 14, i32 15>
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[PICK_0]])
; MINSPLIT-NEXT:    call void @fun_v8i8(<8 x i8> [[PICK_1]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 10, i32 11, i32 14, i32 15>
  call void @fun_v8i8(<8 x i8> %pick.0)
  call void @fun_v8i8(<8 x i8> %pick.1)
  ret void
}

define void @i8_16x1_to_4x1(i64 %ptr) {
; MINSPLIT-LABEL: @i8_16x1_to_4x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP3:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 8, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP4:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 12, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP2]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP3]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP4]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 16, i32 1, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %pick.2 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
  %pick.3 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
  call void @fun_v4i8(<4 x i8> %pick.0)
  call void @fun_v4i8(<4 x i8> %pick.1)
  call void @fun_v4i8(<4 x i8> %pick.2)
  call void @fun_v4i8(<4 x i8> %pick.3)
  ret void
}

define void @i8_8x2_to_4x1(i64 %ptr) {
; MINSPLIT-LABEL: @i8_8x2_to_4x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP3:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP4:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 4, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP2]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP3]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP4]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %pick.2 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
  %pick.3 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
  call void @fun_v4i8(<4 x i8> %pick.0)
  call void @fun_v4i8(<4 x i8> %pick.1)
  call void @fun_v4i8(<4 x i8> %pick.2)
  call void @fun_v4i8(<4 x i8> %pick.3)
  ret void
}

define void @i8_8x2_to_2x2(i64 %ptr) {
; MINSPLIT-LABEL: @i8_8x2_to_2x2(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = extractelement <8 x i8> [[TMP1]], i64 0
; MINSPLIT-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> undef, i8 [[TMP2]], i64 0
; MINSPLIT-NEXT:    [[TMP4:%.*]] = extractelement <8 x i8> [[TMP1]], i64 1
; MINSPLIT-NEXT:    [[TMP5:%.*]] = insertelement <4 x i8> [[TMP3]], i8 [[TMP4]], i64 1
; MINSPLIT-NEXT:    [[TMP6:%.*]] = extractelement <8 x i8> [[TMP1]], i64 4
; MINSPLIT-NEXT:    [[TMP7:%.*]] = insertelement <4 x i8> [[TMP5]], i8 [[TMP6]], i64 2
; MINSPLIT-NEXT:    [[TMP8:%.*]] = extractelement <8 x i8> [[TMP1]], i64 5
; MINSPLIT-NEXT:    [[TMP9:%.*]] = insertelement <4 x i8> [[TMP7]], i8 [[TMP8]], i64 3
; MINSPLIT-NEXT:    [[TMP10:%.*]] = extractelement <8 x i8> [[TMP1]], i64 2
; MINSPLIT-NEXT:    [[TMP11:%.*]] = insertelement <4 x i8> undef, i8 [[TMP10]], i64 0
; MINSPLIT-NEXT:    [[TMP12:%.*]] = extractelement <8 x i8> [[TMP1]], i64 3
; MINSPLIT-NEXT:    [[TMP13:%.*]] = insertelement <4 x i8> [[TMP11]], i8 [[TMP12]], i64 1
; MINSPLIT-NEXT:    [[TMP14:%.*]] = extractelement <8 x i8> [[TMP1]], i64 6
; MINSPLIT-NEXT:    [[TMP15:%.*]] = insertelement <4 x i8> [[TMP13]], i8 [[TMP14]], i64 2
; MINSPLIT-NEXT:    [[TMP16:%.*]] = extractelement <8 x i8> [[TMP1]], i64 7
; MINSPLIT-NEXT:    [[TMP17:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP16]], i64 3
; MINSPLIT-NEXT:    [[TMP18:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 8, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP19:%.*]] = extractelement <8 x i8> [[TMP18]], i64 0
; MINSPLIT-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> undef, i8 [[TMP19]], i64 0
; MINSPLIT-NEXT:    [[TMP21:%.*]] = extractelement <8 x i8> [[TMP18]], i64 1
; MINSPLIT-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP21]], i64 1
; MINSPLIT-NEXT:    [[TMP23:%.*]] = extractelement <8 x i8> [[TMP18]], i64 4
; MINSPLIT-NEXT:    [[TMP24:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP23]], i64 2
; MINSPLIT-NEXT:    [[TMP25:%.*]] = extractelement <8 x i8> [[TMP18]], i64 5
; MINSPLIT-NEXT:    [[TMP26:%.*]] = insertelement <4 x i8> [[TMP24]], i8 [[TMP25]], i64 3
; MINSPLIT-NEXT:    [[TMP27:%.*]] = extractelement <8 x i8> [[TMP18]], i64 2
; MINSPLIT-NEXT:    [[TMP28:%.*]] = insertelement <4 x i8> undef, i8 [[TMP27]], i64 0
; MINSPLIT-NEXT:    [[TMP29:%.*]] = extractelement <8 x i8> [[TMP18]], i64 3
; MINSPLIT-NEXT:    [[TMP30:%.*]] = insertelement <4 x i8> [[TMP28]], i8 [[TMP29]], i64 1
; MINSPLIT-NEXT:    [[TMP31:%.*]] = extractelement <8 x i8> [[TMP18]], i64 6
; MINSPLIT-NEXT:    [[TMP32:%.*]] = insertelement <4 x i8> [[TMP30]], i8 [[TMP31]], i64 2
; MINSPLIT-NEXT:    [[TMP33:%.*]] = extractelement <8 x i8> [[TMP18]], i64 7
; MINSPLIT-NEXT:    [[TMP34:%.*]] = insertelement <4 x i8> [[TMP32]], i8 [[TMP33]], i64 3
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP9]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP17]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP26]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP34]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 8, i32 9>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 10, i32 11>
  %pick.2 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 12, i32 13>
  %pick.3 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 14, i32 15>
  call void @fun_v4i8(<4 x i8> %pick.0)
  call void @fun_v4i8(<4 x i8> %pick.1)
  call void @fun_v4i8(<4 x i8> %pick.2)
  call void @fun_v4i8(<4 x i8> %pick.3)
  ret void
}

define void @i8_4x4_to_4x1(i64 %ptr) {
; MINSPLIT-LABEL: @i8_4x4_to_4x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP3:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 32, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP4:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 48, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP1]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP2]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP3]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP4]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %pick.2 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
  %pick.3 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
  call void @fun_v4i8(<4 x i8> %pick.0)
  call void @fun_v4i8(<4 x i8> %pick.1)
  call void @fun_v4i8(<4 x i8> %pick.2)
  call void @fun_v4i8(<4 x i8> %pick.3)
  ret void
}

define void @i8_4x4_to_2x2(i64 %ptr) {
; MINSPLIT-LABEL: @i8_4x4_to_2x2(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = extractelement <8 x i8> [[TMP1]], i64 0
; MINSPLIT-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> undef, i8 [[TMP2]], i64 0
; MINSPLIT-NEXT:    [[TMP4:%.*]] = extractelement <8 x i8> [[TMP1]], i64 1
; MINSPLIT-NEXT:    [[TMP5:%.*]] = insertelement <4 x i8> [[TMP3]], i8 [[TMP4]], i64 1
; MINSPLIT-NEXT:    [[TMP6:%.*]] = extractelement <8 x i8> [[TMP1]], i64 4
; MINSPLIT-NEXT:    [[TMP7:%.*]] = insertelement <4 x i8> [[TMP5]], i8 [[TMP6]], i64 2
; MINSPLIT-NEXT:    [[TMP8:%.*]] = extractelement <8 x i8> [[TMP1]], i64 5
; MINSPLIT-NEXT:    [[TMP9:%.*]] = insertelement <4 x i8> [[TMP7]], i8 [[TMP8]], i64 3
; MINSPLIT-NEXT:    [[TMP10:%.*]] = extractelement <8 x i8> [[TMP1]], i64 2
; MINSPLIT-NEXT:    [[TMP11:%.*]] = insertelement <4 x i8> undef, i8 [[TMP10]], i64 0
; MINSPLIT-NEXT:    [[TMP12:%.*]] = extractelement <8 x i8> [[TMP1]], i64 3
; MINSPLIT-NEXT:    [[TMP13:%.*]] = insertelement <4 x i8> [[TMP11]], i8 [[TMP12]], i64 1
; MINSPLIT-NEXT:    [[TMP14:%.*]] = extractelement <8 x i8> [[TMP1]], i64 6
; MINSPLIT-NEXT:    [[TMP15:%.*]] = insertelement <4 x i8> [[TMP13]], i8 [[TMP14]], i64 2
; MINSPLIT-NEXT:    [[TMP16:%.*]] = extractelement <8 x i8> [[TMP1]], i64 7
; MINSPLIT-NEXT:    [[TMP17:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP16]], i64 3
; MINSPLIT-NEXT:    [[TMP18:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 32, i32 0, i32 8, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP19:%.*]] = extractelement <8 x i8> [[TMP18]], i64 0
; MINSPLIT-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> undef, i8 [[TMP19]], i64 0
; MINSPLIT-NEXT:    [[TMP21:%.*]] = extractelement <8 x i8> [[TMP18]], i64 1
; MINSPLIT-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP21]], i64 1
; MINSPLIT-NEXT:    [[TMP23:%.*]] = extractelement <8 x i8> [[TMP18]], i64 4
; MINSPLIT-NEXT:    [[TMP24:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP23]], i64 2
; MINSPLIT-NEXT:    [[TMP25:%.*]] = extractelement <8 x i8> [[TMP18]], i64 5
; MINSPLIT-NEXT:    [[TMP26:%.*]] = insertelement <4 x i8> [[TMP24]], i8 [[TMP25]], i64 3
; MINSPLIT-NEXT:    [[TMP27:%.*]] = extractelement <8 x i8> [[TMP18]], i64 2
; MINSPLIT-NEXT:    [[TMP28:%.*]] = insertelement <4 x i8> undef, i8 [[TMP27]], i64 0
; MINSPLIT-NEXT:    [[TMP29:%.*]] = extractelement <8 x i8> [[TMP18]], i64 3
; MINSPLIT-NEXT:    [[TMP30:%.*]] = insertelement <4 x i8> [[TMP28]], i8 [[TMP29]], i64 1
; MINSPLIT-NEXT:    [[TMP31:%.*]] = extractelement <8 x i8> [[TMP18]], i64 6
; MINSPLIT-NEXT:    [[TMP32:%.*]] = insertelement <4 x i8> [[TMP30]], i8 [[TMP31]], i64 2
; MINSPLIT-NEXT:    [[TMP33:%.*]] = extractelement <8 x i8> [[TMP18]], i64 7
; MINSPLIT-NEXT:    [[TMP34:%.*]] = insertelement <4 x i8> [[TMP32]], i8 [[TMP33]], i64 3
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP9]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP17]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP26]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[TMP34]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
  %pick.2 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 8, i32 9, i32 12, i32 13>
  %pick.3 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 10, i32 11, i32 14, i32 15>
  call void @fun_v4i8(<4 x i8> %pick.0)
  call void @fun_v4i8(<4 x i8> %pick.1)
  call void @fun_v4i8(<4 x i8> %pick.2)
  call void @fun_v4i8(<4 x i8> %pick.3)
  ret void
}

define void @i8_4x4_to_2x2_fail(i64 %ptr) {
; MINSPLIT-LABEL: @i8_4x4_to_2x2_fail(
; MINSPLIT-NEXT:    [[VEC:%.*]] = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[PICK_0:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 8, i32 9>
; MINSPLIT-NEXT:    [[PICK_1:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 10, i32 11>
; MINSPLIT-NEXT:    [[PICK_2:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 12, i32 13>
; MINSPLIT-NEXT:    [[PICK_3:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 14, i32 15>
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[PICK_0]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[PICK_1]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[PICK_2]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[PICK_3]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 8, i32 9>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 10, i32 11>
  %pick.2 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 12, i32 13>
  %pick.3 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 14, i32 15>
  call void @fun_v4i8(<4 x i8> %pick.0)
  call void @fun_v4i8(<4 x i8> %pick.1)
  call void @fun_v4i8(<4 x i8> %pick.2)
  call void @fun_v4i8(<4 x i8> %pick.3)
  ret void
}

define void @i8_4x4_to_1x4(i64 %ptr) {
; MINSPLIT-LABEL: @i8_4x4_to_1x4(
; MINSPLIT-NEXT:    [[VEC:%.*]] = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[PICK_0:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
; MINSPLIT-NEXT:    [[PICK_1:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
; MINSPLIT-NEXT:    [[PICK_2:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
; MINSPLIT-NEXT:    [[PICK_3:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[PICK_0]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[PICK_1]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[PICK_2]])
; MINSPLIT-NEXT:    call void @fun_v4i8(<4 x i8> [[PICK_3]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
  %pick.2 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
  %pick.3 = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
  call void @fun_v4i8(<4 x i8> %pick.0)
  call void @fun_v4i8(<4 x i8> %pick.1)
  call void @fun_v4i8(<4 x i8> %pick.2)
  call void @fun_v4i8(<4 x i8> %pick.3)
  ret void
}

define void @i8_8x2_to_2x1(i64 %ptr) {
; MINSPLIT-LABEL: @i8_8x2_to_2x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = extractelement <4 x i8> [[TMP1]], i64 0
; MINSPLIT-NEXT:    [[TMP3:%.*]] = insertelement <2 x i8> undef, i8 [[TMP2]], i64 0
; MINSPLIT-NEXT:    [[TMP4:%.*]] = extractelement <4 x i8> [[TMP1]], i64 1
; MINSPLIT-NEXT:    [[TMP5:%.*]] = insertelement <2 x i8> [[TMP3]], i8 [[TMP4]], i64 1
; MINSPLIT-NEXT:    [[TMP6:%.*]] = extractelement <4 x i8> [[TMP1]], i64 2
; MINSPLIT-NEXT:    [[TMP7:%.*]] = insertelement <2 x i8> undef, i8 [[TMP6]], i64 0
; MINSPLIT-NEXT:    [[TMP8:%.*]] = extractelement <4 x i8> [[TMP1]], i64 3
; MINSPLIT-NEXT:    [[TMP9:%.*]] = insertelement <2 x i8> [[TMP7]], i8 [[TMP8]], i64 1
; MINSPLIT-NEXT:    [[TMP10:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP11:%.*]] = extractelement <4 x i8> [[TMP10]], i64 0
; MINSPLIT-NEXT:    [[TMP12:%.*]] = insertelement <2 x i8> undef, i8 [[TMP11]], i64 0
; MINSPLIT-NEXT:    [[TMP13:%.*]] = extractelement <4 x i8> [[TMP10]], i64 1
; MINSPLIT-NEXT:    [[TMP14:%.*]] = insertelement <2 x i8> [[TMP12]], i8 [[TMP13]], i64 1
; MINSPLIT-NEXT:    [[TMP15:%.*]] = extractelement <4 x i8> [[TMP10]], i64 2
; MINSPLIT-NEXT:    [[TMP16:%.*]] = insertelement <2 x i8> undef, i8 [[TMP15]], i64 0
; MINSPLIT-NEXT:    [[TMP17:%.*]] = extractelement <4 x i8> [[TMP10]], i64 3
; MINSPLIT-NEXT:    [[TMP18:%.*]] = insertelement <2 x i8> [[TMP16]], i8 [[TMP17]], i64 1
; MINSPLIT-NEXT:    [[TMP19:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP20:%.*]] = extractelement <4 x i8> [[TMP19]], i64 0
; MINSPLIT-NEXT:    [[TMP21:%.*]] = insertelement <2 x i8> undef, i8 [[TMP20]], i64 0
; MINSPLIT-NEXT:    [[TMP22:%.*]] = extractelement <4 x i8> [[TMP19]], i64 1
; MINSPLIT-NEXT:    [[TMP23:%.*]] = insertelement <2 x i8> [[TMP21]], i8 [[TMP22]], i64 1
; MINSPLIT-NEXT:    [[TMP24:%.*]] = extractelement <4 x i8> [[TMP19]], i64 2
; MINSPLIT-NEXT:    [[TMP25:%.*]] = insertelement <2 x i8> undef, i8 [[TMP24]], i64 0
; MINSPLIT-NEXT:    [[TMP26:%.*]] = extractelement <4 x i8> [[TMP19]], i64 3
; MINSPLIT-NEXT:    [[TMP27:%.*]] = insertelement <2 x i8> [[TMP25]], i8 [[TMP26]], i64 1
; MINSPLIT-NEXT:    [[TMP28:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 4, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP29:%.*]] = extractelement <4 x i8> [[TMP28]], i64 0
; MINSPLIT-NEXT:    [[TMP30:%.*]] = insertelement <2 x i8> undef, i8 [[TMP29]], i64 0
; MINSPLIT-NEXT:    [[TMP31:%.*]] = extractelement <4 x i8> [[TMP28]], i64 1
; MINSPLIT-NEXT:    [[TMP32:%.*]] = insertelement <2 x i8> [[TMP30]], i8 [[TMP31]], i64 1
; MINSPLIT-NEXT:    [[TMP33:%.*]] = extractelement <4 x i8> [[TMP28]], i64 2
; MINSPLIT-NEXT:    [[TMP34:%.*]] = insertelement <2 x i8> undef, i8 [[TMP33]], i64 0
; MINSPLIT-NEXT:    [[TMP35:%.*]] = extractelement <4 x i8> [[TMP28]], i64 3
; MINSPLIT-NEXT:    [[TMP36:%.*]] = insertelement <2 x i8> [[TMP34]], i8 [[TMP35]], i64 1
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP5]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP9]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP14]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP18]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP23]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP27]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP32]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP36]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 2, i32 3>
  %pick.2 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 4, i32 5>
  %pick.3 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 6, i32 7>
  %pick.4 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 8, i32 9>
  %pick.5 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 10, i32 11>
  %pick.6 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 12, i32 13>
  %pick.7 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 14, i32 15>
  call void @fun_v2i8(<2 x i8> %pick.0)
  call void @fun_v2i8(<2 x i8> %pick.1)
  call void @fun_v2i8(<2 x i8> %pick.2)
  call void @fun_v2i8(<2 x i8> %pick.3)
  call void @fun_v2i8(<2 x i8> %pick.4)
  call void @fun_v2i8(<2 x i8> %pick.5)
  call void @fun_v2i8(<2 x i8> %pick.6)
  call void @fun_v2i8(<2 x i8> %pick.7)
  ret void
}

define void @i8_8x2_to_1x2(i64 %ptr) {
; MINSPLIT-LABEL: @i8_8x2_to_1x2(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = extractelement <8 x i8> [[TMP1]], i64 0
; MINSPLIT-NEXT:    [[TMP3:%.*]] = insertelement <2 x i8> undef, i8 [[TMP2]], i64 0
; MINSPLIT-NEXT:    [[TMP4:%.*]] = extractelement <8 x i8> [[TMP1]], i64 4
; MINSPLIT-NEXT:    [[TMP5:%.*]] = insertelement <2 x i8> [[TMP3]], i8 [[TMP4]], i64 1
; MINSPLIT-NEXT:    [[TMP6:%.*]] = extractelement <8 x i8> [[TMP1]], i64 1
; MINSPLIT-NEXT:    [[TMP7:%.*]] = insertelement <2 x i8> undef, i8 [[TMP6]], i64 0
; MINSPLIT-NEXT:    [[TMP8:%.*]] = extractelement <8 x i8> [[TMP1]], i64 5
; MINSPLIT-NEXT:    [[TMP9:%.*]] = insertelement <2 x i8> [[TMP7]], i8 [[TMP8]], i64 1
; MINSPLIT-NEXT:    [[TMP10:%.*]] = extractelement <8 x i8> [[TMP1]], i64 2
; MINSPLIT-NEXT:    [[TMP11:%.*]] = insertelement <2 x i8> undef, i8 [[TMP10]], i64 0
; MINSPLIT-NEXT:    [[TMP12:%.*]] = extractelement <8 x i8> [[TMP1]], i64 6
; MINSPLIT-NEXT:    [[TMP13:%.*]] = insertelement <2 x i8> [[TMP11]], i8 [[TMP12]], i64 1
; MINSPLIT-NEXT:    [[TMP14:%.*]] = extractelement <8 x i8> [[TMP1]], i64 3
; MINSPLIT-NEXT:    [[TMP15:%.*]] = insertelement <2 x i8> undef, i8 [[TMP14]], i64 0
; MINSPLIT-NEXT:    [[TMP16:%.*]] = extractelement <8 x i8> [[TMP1]], i64 7
; MINSPLIT-NEXT:    [[TMP17:%.*]] = insertelement <2 x i8> [[TMP15]], i8 [[TMP16]], i64 1
; MINSPLIT-NEXT:    [[TMP18:%.*]] = call <8 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v8i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 8, i32 16, i32 4, i32 2, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP19:%.*]] = extractelement <8 x i8> [[TMP18]], i64 0
; MINSPLIT-NEXT:    [[TMP20:%.*]] = insertelement <2 x i8> undef, i8 [[TMP19]], i64 0
; MINSPLIT-NEXT:    [[TMP21:%.*]] = extractelement <8 x i8> [[TMP18]], i64 4
; MINSPLIT-NEXT:    [[TMP22:%.*]] = insertelement <2 x i8> [[TMP20]], i8 [[TMP21]], i64 1
; MINSPLIT-NEXT:    [[TMP23:%.*]] = extractelement <8 x i8> [[TMP18]], i64 1
; MINSPLIT-NEXT:    [[TMP24:%.*]] = insertelement <2 x i8> undef, i8 [[TMP23]], i64 0
; MINSPLIT-NEXT:    [[TMP25:%.*]] = extractelement <8 x i8> [[TMP18]], i64 5
; MINSPLIT-NEXT:    [[TMP26:%.*]] = insertelement <2 x i8> [[TMP24]], i8 [[TMP25]], i64 1
; MINSPLIT-NEXT:    [[TMP27:%.*]] = extractelement <8 x i8> [[TMP18]], i64 2
; MINSPLIT-NEXT:    [[TMP28:%.*]] = insertelement <2 x i8> undef, i8 [[TMP27]], i64 0
; MINSPLIT-NEXT:    [[TMP29:%.*]] = extractelement <8 x i8> [[TMP18]], i64 6
; MINSPLIT-NEXT:    [[TMP30:%.*]] = insertelement <2 x i8> [[TMP28]], i8 [[TMP29]], i64 1
; MINSPLIT-NEXT:    [[TMP31:%.*]] = extractelement <8 x i8> [[TMP18]], i64 3
; MINSPLIT-NEXT:    [[TMP32:%.*]] = insertelement <2 x i8> undef, i8 [[TMP31]], i64 0
; MINSPLIT-NEXT:    [[TMP33:%.*]] = extractelement <8 x i8> [[TMP18]], i64 7
; MINSPLIT-NEXT:    [[TMP34:%.*]] = insertelement <2 x i8> [[TMP32]], i8 [[TMP33]], i64 1
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP5]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP9]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP13]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP17]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP22]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP26]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP30]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP34]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 0, i32 8>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 1, i32 9>
  %pick.2 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 2, i32 10>
  %pick.3 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 3, i32 11>
  %pick.4 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 4, i32 12>
  %pick.5 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 5, i32 13>
  %pick.6 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 6, i32 14>
  %pick.7 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 7, i32 15>
  call void @fun_v2i8(<2 x i8> %pick.0)
  call void @fun_v2i8(<2 x i8> %pick.1)
  call void @fun_v2i8(<2 x i8> %pick.2)
  call void @fun_v2i8(<2 x i8> %pick.3)
  call void @fun_v2i8(<2 x i8> %pick.4)
  call void @fun_v2i8(<2 x i8> %pick.5)
  call void @fun_v2i8(<2 x i8> %pick.6)
  call void @fun_v2i8(<2 x i8> %pick.7)
  ret void
}

define void @i8_8x2_to_1x1(i64 %ptr) {
; MINSPLIT-LABEL: @i8_8x2_to_1x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = extractelement <4 x i8> [[TMP1]], i64 0
; MINSPLIT-NEXT:    [[TMP3:%.*]] = insertelement <1 x i8> undef, i8 [[TMP2]], i64 0
; MINSPLIT-NEXT:    [[TMP4:%.*]] = extractelement <4 x i8> [[TMP1]], i64 1
; MINSPLIT-NEXT:    [[TMP5:%.*]] = insertelement <1 x i8> undef, i8 [[TMP4]], i64 0
; MINSPLIT-NEXT:    [[TMP6:%.*]] = extractelement <4 x i8> [[TMP1]], i64 2
; MINSPLIT-NEXT:    [[TMP7:%.*]] = insertelement <1 x i8> undef, i8 [[TMP6]], i64 0
; MINSPLIT-NEXT:    [[TMP8:%.*]] = extractelement <4 x i8> [[TMP1]], i64 3
; MINSPLIT-NEXT:    [[TMP9:%.*]] = insertelement <1 x i8> undef, i8 [[TMP8]], i64 0
; MINSPLIT-NEXT:    [[TMP10:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 0, i32 4, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP11:%.*]] = extractelement <4 x i8> [[TMP10]], i64 0
; MINSPLIT-NEXT:    [[TMP12:%.*]] = insertelement <1 x i8> undef, i8 [[TMP11]], i64 0
; MINSPLIT-NEXT:    [[TMP13:%.*]] = extractelement <4 x i8> [[TMP10]], i64 1
; MINSPLIT-NEXT:    [[TMP14:%.*]] = insertelement <1 x i8> undef, i8 [[TMP13]], i64 0
; MINSPLIT-NEXT:    [[TMP15:%.*]] = extractelement <4 x i8> [[TMP10]], i64 2
; MINSPLIT-NEXT:    [[TMP16:%.*]] = insertelement <1 x i8> undef, i8 [[TMP15]], i64 0
; MINSPLIT-NEXT:    [[TMP17:%.*]] = extractelement <4 x i8> [[TMP10]], i64 3
; MINSPLIT-NEXT:    [[TMP18:%.*]] = insertelement <1 x i8> undef, i8 [[TMP17]], i64 0
; MINSPLIT-NEXT:    [[TMP19:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP20:%.*]] = extractelement <4 x i8> [[TMP19]], i64 0
; MINSPLIT-NEXT:    [[TMP21:%.*]] = insertelement <1 x i8> undef, i8 [[TMP20]], i64 0
; MINSPLIT-NEXT:    [[TMP22:%.*]] = extractelement <4 x i8> [[TMP19]], i64 1
; MINSPLIT-NEXT:    [[TMP23:%.*]] = insertelement <1 x i8> undef, i8 [[TMP22]], i64 0
; MINSPLIT-NEXT:    [[TMP24:%.*]] = extractelement <4 x i8> [[TMP19]], i64 2
; MINSPLIT-NEXT:    [[TMP25:%.*]] = insertelement <1 x i8> undef, i8 [[TMP24]], i64 0
; MINSPLIT-NEXT:    [[TMP26:%.*]] = extractelement <4 x i8> [[TMP19]], i64 3
; MINSPLIT-NEXT:    [[TMP27:%.*]] = insertelement <1 x i8> undef, i8 [[TMP26]], i64 0
; MINSPLIT-NEXT:    [[TMP28:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 4, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP29:%.*]] = extractelement <4 x i8> [[TMP28]], i64 0
; MINSPLIT-NEXT:    [[TMP30:%.*]] = insertelement <1 x i8> undef, i8 [[TMP29]], i64 0
; MINSPLIT-NEXT:    [[TMP31:%.*]] = extractelement <4 x i8> [[TMP28]], i64 1
; MINSPLIT-NEXT:    [[TMP32:%.*]] = insertelement <1 x i8> undef, i8 [[TMP31]], i64 0
; MINSPLIT-NEXT:    [[TMP33:%.*]] = extractelement <4 x i8> [[TMP28]], i64 2
; MINSPLIT-NEXT:    [[TMP34:%.*]] = insertelement <1 x i8> undef, i8 [[TMP33]], i64 0
; MINSPLIT-NEXT:    [[TMP35:%.*]] = extractelement <4 x i8> [[TMP28]], i64 3
; MINSPLIT-NEXT:    [[TMP36:%.*]] = insertelement <1 x i8> undef, i8 [[TMP35]], i64 0
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP3]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP5]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP7]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP9]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP12]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP14]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP16]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP18]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP21]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP23]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP25]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP27]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP30]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP32]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP34]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP36]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 8, i32 2, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 0>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 1>
  %pick.2 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 2>
  %pick.3 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 3>
  %pick.4 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 4>
  %pick.5 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 5>
  %pick.6 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 6>
  %pick.7 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 7>
  %pick.8 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 8>
  %pick.9 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 9>
  %pick.10 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 10>
  %pick.11 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 11>
  %pick.12 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 12>
  %pick.13 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 13>
  %pick.14 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 14>
  %pick.15 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 15>
  call void @fun_v1i8(<1 x i8> %pick.0)
  call void @fun_v1i8(<1 x i8> %pick.1)
  call void @fun_v1i8(<1 x i8> %pick.2)
  call void @fun_v1i8(<1 x i8> %pick.3)
  call void @fun_v1i8(<1 x i8> %pick.4)
  call void @fun_v1i8(<1 x i8> %pick.5)
  call void @fun_v1i8(<1 x i8> %pick.6)
  call void @fun_v1i8(<1 x i8> %pick.7)
  call void @fun_v1i8(<1 x i8> %pick.8)
  call void @fun_v1i8(<1 x i8> %pick.9)
  call void @fun_v1i8(<1 x i8> %pick.10)
  call void @fun_v1i8(<1 x i8> %pick.11)
  call void @fun_v1i8(<1 x i8> %pick.12)
  call void @fun_v1i8(<1 x i8> %pick.13)
  call void @fun_v1i8(<1 x i8> %pick.14)
  call void @fun_v1i8(<1 x i8> %pick.15)
  ret void
}

define void @i8_4x4_to_2x1(i64 %ptr) {
; MINSPLIT-LABEL: @i8_4x4_to_2x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = extractelement <4 x i8> [[TMP1]], i64 0
; MINSPLIT-NEXT:    [[TMP3:%.*]] = insertelement <2 x i8> undef, i8 [[TMP2]], i64 0
; MINSPLIT-NEXT:    [[TMP4:%.*]] = extractelement <4 x i8> [[TMP1]], i64 1
; MINSPLIT-NEXT:    [[TMP5:%.*]] = insertelement <2 x i8> [[TMP3]], i8 [[TMP4]], i64 1
; MINSPLIT-NEXT:    [[TMP6:%.*]] = extractelement <4 x i8> [[TMP1]], i64 2
; MINSPLIT-NEXT:    [[TMP7:%.*]] = insertelement <2 x i8> undef, i8 [[TMP6]], i64 0
; MINSPLIT-NEXT:    [[TMP8:%.*]] = extractelement <4 x i8> [[TMP1]], i64 3
; MINSPLIT-NEXT:    [[TMP9:%.*]] = insertelement <2 x i8> [[TMP7]], i8 [[TMP8]], i64 1
; MINSPLIT-NEXT:    [[TMP10:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP11:%.*]] = extractelement <4 x i8> [[TMP10]], i64 0
; MINSPLIT-NEXT:    [[TMP12:%.*]] = insertelement <2 x i8> undef, i8 [[TMP11]], i64 0
; MINSPLIT-NEXT:    [[TMP13:%.*]] = extractelement <4 x i8> [[TMP10]], i64 1
; MINSPLIT-NEXT:    [[TMP14:%.*]] = insertelement <2 x i8> [[TMP12]], i8 [[TMP13]], i64 1
; MINSPLIT-NEXT:    [[TMP15:%.*]] = extractelement <4 x i8> [[TMP10]], i64 2
; MINSPLIT-NEXT:    [[TMP16:%.*]] = insertelement <2 x i8> undef, i8 [[TMP15]], i64 0
; MINSPLIT-NEXT:    [[TMP17:%.*]] = extractelement <4 x i8> [[TMP10]], i64 3
; MINSPLIT-NEXT:    [[TMP18:%.*]] = insertelement <2 x i8> [[TMP16]], i8 [[TMP17]], i64 1
; MINSPLIT-NEXT:    [[TMP19:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 32, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP20:%.*]] = extractelement <4 x i8> [[TMP19]], i64 0
; MINSPLIT-NEXT:    [[TMP21:%.*]] = insertelement <2 x i8> undef, i8 [[TMP20]], i64 0
; MINSPLIT-NEXT:    [[TMP22:%.*]] = extractelement <4 x i8> [[TMP19]], i64 1
; MINSPLIT-NEXT:    [[TMP23:%.*]] = insertelement <2 x i8> [[TMP21]], i8 [[TMP22]], i64 1
; MINSPLIT-NEXT:    [[TMP24:%.*]] = extractelement <4 x i8> [[TMP19]], i64 2
; MINSPLIT-NEXT:    [[TMP25:%.*]] = insertelement <2 x i8> undef, i8 [[TMP24]], i64 0
; MINSPLIT-NEXT:    [[TMP26:%.*]] = extractelement <4 x i8> [[TMP19]], i64 3
; MINSPLIT-NEXT:    [[TMP27:%.*]] = insertelement <2 x i8> [[TMP25]], i8 [[TMP26]], i64 1
; MINSPLIT-NEXT:    [[TMP28:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 48, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP29:%.*]] = extractelement <4 x i8> [[TMP28]], i64 0
; MINSPLIT-NEXT:    [[TMP30:%.*]] = insertelement <2 x i8> undef, i8 [[TMP29]], i64 0
; MINSPLIT-NEXT:    [[TMP31:%.*]] = extractelement <4 x i8> [[TMP28]], i64 1
; MINSPLIT-NEXT:    [[TMP32:%.*]] = insertelement <2 x i8> [[TMP30]], i8 [[TMP31]], i64 1
; MINSPLIT-NEXT:    [[TMP33:%.*]] = extractelement <4 x i8> [[TMP28]], i64 2
; MINSPLIT-NEXT:    [[TMP34:%.*]] = insertelement <2 x i8> undef, i8 [[TMP33]], i64 0
; MINSPLIT-NEXT:    [[TMP35:%.*]] = extractelement <4 x i8> [[TMP28]], i64 3
; MINSPLIT-NEXT:    [[TMP36:%.*]] = insertelement <2 x i8> [[TMP34]], i8 [[TMP35]], i64 1
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP5]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP9]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP14]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP18]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP23]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP27]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP32]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[TMP36]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 2, i32 3>
  %pick.2 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 4, i32 5>
  %pick.3 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 6, i32 7>
  %pick.4 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 8, i32 9>
  %pick.5 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 10, i32 11>
  %pick.6 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 12, i32 13>
  %pick.7 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 14, i32 15>
  call void @fun_v2i8(<2 x i8> %pick.0)
  call void @fun_v2i8(<2 x i8> %pick.1)
  call void @fun_v2i8(<2 x i8> %pick.2)
  call void @fun_v2i8(<2 x i8> %pick.3)
  call void @fun_v2i8(<2 x i8> %pick.4)
  call void @fun_v2i8(<2 x i8> %pick.5)
  call void @fun_v2i8(<2 x i8> %pick.6)
  call void @fun_v2i8(<2 x i8> %pick.7)
  ret void
}

define void @i8_4x4_to_1x2(i64 %ptr) {
; MINSPLIT-LABEL: @i8_4x4_to_1x2(
; MINSPLIT-NEXT:    [[VEC:%.*]] = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[PICK_0:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <2 x i32> <i32 0, i32 8>
; MINSPLIT-NEXT:    [[PICK_1:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <2 x i32> <i32 1, i32 9>
; MINSPLIT-NEXT:    [[PICK_2:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <2 x i32> <i32 2, i32 10>
; MINSPLIT-NEXT:    [[PICK_3:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <2 x i32> <i32 3, i32 11>
; MINSPLIT-NEXT:    [[PICK_4:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <2 x i32> <i32 4, i32 12>
; MINSPLIT-NEXT:    [[PICK_5:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <2 x i32> <i32 5, i32 13>
; MINSPLIT-NEXT:    [[PICK_6:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <2 x i32> <i32 6, i32 14>
; MINSPLIT-NEXT:    [[PICK_7:%.*]] = shufflevector <16 x i8> [[VEC]], <16 x i8> undef, <2 x i32> <i32 7, i32 15>
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[PICK_0]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[PICK_1]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[PICK_2]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[PICK_3]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[PICK_4]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[PICK_5]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[PICK_6]])
; MINSPLIT-NEXT:    call void @fun_v2i8(<2 x i8> [[PICK_7]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 0, i32 8>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 1, i32 9>
  %pick.2 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 2, i32 10>
  %pick.3 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 3, i32 11>
  %pick.4 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 4, i32 12>
  %pick.5 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 5, i32 13>
  %pick.6 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 6, i32 14>
  %pick.7 = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 7, i32 15>
  call void @fun_v2i8(<2 x i8> %pick.0)
  call void @fun_v2i8(<2 x i8> %pick.1)
  call void @fun_v2i8(<2 x i8> %pick.2)
  call void @fun_v2i8(<2 x i8> %pick.3)
  call void @fun_v2i8(<2 x i8> %pick.4)
  call void @fun_v2i8(<2 x i8> %pick.5)
  call void @fun_v2i8(<2 x i8> %pick.6)
  call void @fun_v2i8(<2 x i8> %pick.7)
  ret void
}

define void @i8_4x4_to_1x1(i64 %ptr) {
; MINSPLIT-LABEL: @i8_4x4_to_1x1(
; MINSPLIT-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR:%.*]], i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP2:%.*]] = extractelement <4 x i8> [[TMP1]], i64 0
; MINSPLIT-NEXT:    [[TMP3:%.*]] = insertelement <1 x i8> undef, i8 [[TMP2]], i64 0
; MINSPLIT-NEXT:    [[TMP4:%.*]] = extractelement <4 x i8> [[TMP1]], i64 1
; MINSPLIT-NEXT:    [[TMP5:%.*]] = insertelement <1 x i8> undef, i8 [[TMP4]], i64 0
; MINSPLIT-NEXT:    [[TMP6:%.*]] = extractelement <4 x i8> [[TMP1]], i64 2
; MINSPLIT-NEXT:    [[TMP7:%.*]] = insertelement <1 x i8> undef, i8 [[TMP6]], i64 0
; MINSPLIT-NEXT:    [[TMP8:%.*]] = extractelement <4 x i8> [[TMP1]], i64 3
; MINSPLIT-NEXT:    [[TMP9:%.*]] = insertelement <1 x i8> undef, i8 [[TMP8]], i64 0
; MINSPLIT-NEXT:    [[TMP10:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 16, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP11:%.*]] = extractelement <4 x i8> [[TMP10]], i64 0
; MINSPLIT-NEXT:    [[TMP12:%.*]] = insertelement <1 x i8> undef, i8 [[TMP11]], i64 0
; MINSPLIT-NEXT:    [[TMP13:%.*]] = extractelement <4 x i8> [[TMP10]], i64 1
; MINSPLIT-NEXT:    [[TMP14:%.*]] = insertelement <1 x i8> undef, i8 [[TMP13]], i64 0
; MINSPLIT-NEXT:    [[TMP15:%.*]] = extractelement <4 x i8> [[TMP10]], i64 2
; MINSPLIT-NEXT:    [[TMP16:%.*]] = insertelement <1 x i8> undef, i8 [[TMP15]], i64 0
; MINSPLIT-NEXT:    [[TMP17:%.*]] = extractelement <4 x i8> [[TMP10]], i64 3
; MINSPLIT-NEXT:    [[TMP18:%.*]] = insertelement <1 x i8> undef, i8 [[TMP17]], i64 0
; MINSPLIT-NEXT:    [[TMP19:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 32, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP20:%.*]] = extractelement <4 x i8> [[TMP19]], i64 0
; MINSPLIT-NEXT:    [[TMP21:%.*]] = insertelement <1 x i8> undef, i8 [[TMP20]], i64 0
; MINSPLIT-NEXT:    [[TMP22:%.*]] = extractelement <4 x i8> [[TMP19]], i64 1
; MINSPLIT-NEXT:    [[TMP23:%.*]] = insertelement <1 x i8> undef, i8 [[TMP22]], i64 0
; MINSPLIT-NEXT:    [[TMP24:%.*]] = extractelement <4 x i8> [[TMP19]], i64 2
; MINSPLIT-NEXT:    [[TMP25:%.*]] = insertelement <1 x i8> undef, i8 [[TMP24]], i64 0
; MINSPLIT-NEXT:    [[TMP26:%.*]] = extractelement <4 x i8> [[TMP19]], i64 3
; MINSPLIT-NEXT:    [[TMP27:%.*]] = insertelement <1 x i8> undef, i8 [[TMP26]], i64 0
; MINSPLIT-NEXT:    [[TMP28:%.*]] = call <4 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v4i8(i64 [[PTR]], i32 127, i32 63, i32 127, i32 48, i32 0, i32 8, i32 16, i32 4, i32 1, i1 false, i1 false, i32 0)
; MINSPLIT-NEXT:    [[TMP29:%.*]] = extractelement <4 x i8> [[TMP28]], i64 0
; MINSPLIT-NEXT:    [[TMP30:%.*]] = insertelement <1 x i8> undef, i8 [[TMP29]], i64 0
; MINSPLIT-NEXT:    [[TMP31:%.*]] = extractelement <4 x i8> [[TMP28]], i64 1
; MINSPLIT-NEXT:    [[TMP32:%.*]] = insertelement <1 x i8> undef, i8 [[TMP31]], i64 0
; MINSPLIT-NEXT:    [[TMP33:%.*]] = extractelement <4 x i8> [[TMP28]], i64 2
; MINSPLIT-NEXT:    [[TMP34:%.*]] = insertelement <1 x i8> undef, i8 [[TMP33]], i64 0
; MINSPLIT-NEXT:    [[TMP35:%.*]] = extractelement <4 x i8> [[TMP28]], i64 3
; MINSPLIT-NEXT:    [[TMP36:%.*]] = insertelement <1 x i8> undef, i8 [[TMP35]], i64 0
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP3]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP5]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP7]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP9]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP12]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP14]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP16]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP18]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP21]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP23]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP25]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP27]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP30]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP32]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP34]])
; MINSPLIT-NEXT:    call void @fun_v1i8(<1 x i8> [[TMP36]])
; MINSPLIT-NEXT:    ret void
;
  %vec = call <16 x i8> @llvm.genx.GenISA.LSC2DBlockRead.v16i8(i64 %ptr, i32 127, i32 63, i32 127, i32 0, i32 0, i32 8, i32 16, i32 4, i32 4, i1 false, i1 false, i32 0)
  %pick.0 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 0>
  %pick.1 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 1>
  %pick.2 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 2>
  %pick.3 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 3>
  %pick.4 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 4>
  %pick.5 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 5>
  %pick.6 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 6>
  %pick.7 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 7>
  %pick.8 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 8>
  %pick.9 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 9>
  %pick.10 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 10>
  %pick.11 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 11>
  %pick.12 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 12>
  %pick.13 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 13>
  %pick.14 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 14>
  %pick.15 = shufflevector <16 x i8> %vec, <16 x i8> undef, <1 x i32> <i32 15>
  call void @fun_v1i8(<1 x i8> %pick.0)
  call void @fun_v1i8(<1 x i8> %pick.1)
  call void @fun_v1i8(<1 x i8> %pick.2)
  call void @fun_v1i8(<1 x i8> %pick.3)
  call void @fun_v1i8(<1 x i8> %pick.4)
  call void @fun_v1i8(<1 x i8> %pick.5)
  call void @fun_v1i8(<1 x i8> %pick.6)
  call void @fun_v1i8(<1 x i8> %pick.7)
  call void @fun_v1i8(<1 x i8> %pick.8)
  call void @fun_v1i8(<1 x i8> %pick.9)
  call void @fun_v1i8(<1 x i8> %pick.10)
  call void @fun_v1i8(<1 x i8> %pick.11)
  call void @fun_v1i8(<1 x i8> %pick.12)
  call void @fun_v1i8(<1 x i8> %pick.13)
  call void @fun_v1i8(<1 x i8> %pick.14)
  call void @fun_v1i8(<1 x i8> %pick.15)
  ret void
}
