diff --git a/src/main/java/org/apache/lucene/test/Benchmark.java b/src/main/java/org/apache/lucene/test/Benchmark.java index 8c6a02c..dd40abc 100644 --- a/src/main/java/org/apache/lucene/test/Benchmark.java +++ b/src/main/java/org/apache/lucene/test/Benchmark.java @@ -31,7 +31,7 @@ public class Benchmark { private int[] ints; - private int[] intsOutput = new int[32]; + private int[] intsOutput = new int[128]; private long[] longs; final ForUtil forUtil = new ForUtil(); @@ -108,7 +108,316 @@ public int[] encode5SimdPack() throws IOException { return intsOutput; } - // TODO decode/unpack - // TODO: 6 .. 32 + @org.openjdk.jmh.annotations.Benchmark + public long[] encode6ForUtil() throws IOException { + forUtil.encode(longs, 6, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode6SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 6); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode7ForUtil() throws IOException { + forUtil.encode(longs, 7, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode7SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 7); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode8ForUtil() throws IOException { + forUtil.encode(longs, 8, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode8SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 8); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode9ForUtil() throws IOException { + forUtil.encode(longs, 9, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode9SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 9); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode10ForUtil() throws IOException { + forUtil.encode(longs, 10, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode10SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 10); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode11ForUtil() throws IOException { + forUtil.encode(longs, 11, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode11SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 11); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode12ForUtil() throws IOException { + forUtil.encode(longs, 12, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode12SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 12); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode13ForUtil() throws IOException { + forUtil.encode(longs, 13, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode13SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 13); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode14ForUtil() throws IOException { + forUtil.encode(longs, 14, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode14SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 14); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode15ForUtil() throws IOException { + forUtil.encode(longs, 15, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode15SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 15); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode16ForUtil() throws IOException { + forUtil.encode(longs, 16, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode16SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 16); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode17ForUtil() throws IOException { + forUtil.encode(longs, 17, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode17SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 17); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode18ForUtil() throws IOException { + forUtil.encode(longs, 18, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode18SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 18); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode19ForUtil() throws IOException { + forUtil.encode(longs, 19, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode19SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 19); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode20ForUtil() throws IOException { + forUtil.encode(longs, 20, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode20SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 20); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode21ForUtil() throws IOException { + forUtil.encode(longs, 21, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode21SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 21); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode22ForUtil() throws IOException { + forUtil.encode(longs, 22, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode22SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 22); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode23ForUtil() throws IOException { + forUtil.encode(longs, 23, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode23SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 23); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode24ForUtil() throws IOException { + forUtil.encode(longs, 24, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode24SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 24); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode25ForUtil() throws IOException { + forUtil.encode(longs, 25, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode25SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 25); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode26ForUtil() throws IOException { + forUtil.encode(longs, 26, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode26SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 26); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode27ForUtil() throws IOException { + forUtil.encode(longs, 27, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode27SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 27); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode28ForUtil() throws IOException { + forUtil.encode(longs, 28, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode28SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 28); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode29ForUtil() throws IOException { + forUtil.encode(longs, 29, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode29SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 29); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode30ForUtil() throws IOException { + forUtil.encode(longs, 30, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode30SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 30); + return intsOutput; + } + + @org.openjdk.jmh.annotations.Benchmark + public long[] encode31ForUtil() throws IOException { + forUtil.encode(longs, 31, longs); + return longs; + } + + @org.openjdk.jmh.annotations.Benchmark + public int[] encode31SimdPack() throws IOException { + SimdBitPacking.simdPack(ints, intsOutput, 31); + return intsOutput; + } } diff --git a/src/main/java/org/apache/lucene/test/SimdBitPacking.java b/src/main/java/org/apache/lucene/test/SimdBitPacking.java index eb9d832..7976bd8 100644 --- a/src/main/java/org/apache/lucene/test/SimdBitPacking.java +++ b/src/main/java/org/apache/lucene/test/SimdBitPacking.java @@ -42,7 +42,32 @@ static void simdPack(int[] input, int[] output, int bit) { case 3: SIMD_fastPack3(input, output); return; case 4: SIMD_fastPack4(input, output); return; case 5: SIMD_fastPack5(input, output); return; - // TODO 6 .. 32 + case 6: SIMD_fastPack6(input, output); return; + case 7: SIMD_fastPack7(input, output); return; + case 8: SIMD_fastPack8(input, output); return; + case 9: SIMD_fastPack9(input, output); return; + case 10: SIMD_fastPack10(input, output); return; + case 11: SIMD_fastPack11(input, output); return; + case 12: SIMD_fastPack12(input, output); return; + case 13: SIMD_fastPack13(input, output); return; + case 14: SIMD_fastPack14(input, output); return; + case 15: SIMD_fastPack15(input, output); return; + case 16: SIMD_fastPack16(input, output); return; + case 17: SIMD_fastPack17(input, output); return; + case 18: SIMD_fastPack18(input, output); return; + case 19: SIMD_fastPack19(input, output); return; + case 20: SIMD_fastPack20(input, output); return; + case 21: SIMD_fastPack21(input, output); return; + case 22: SIMD_fastPack22(input, output); return; + case 23: SIMD_fastPack23(input, output); return; + case 24: SIMD_fastPack24(input, output); return; + case 25: SIMD_fastPack25(input, output); return; + case 26: SIMD_fastPack26(input, output); return; + case 27: SIMD_fastPack27(input, output); return; + case 28: SIMD_fastPack28(input, output); return; + case 29: SIMD_fastPack29(input, output); return; + case 30: SIMD_fastPack30(input, output); return; + case 31: SIMD_fastPack31(input, output); return; default : throw new UnsupportedOperationException(); } } @@ -55,7 +80,32 @@ static void simdUnpack(int[] input, int[] output, int bit) { case 3: SIMD_fastUnpack3(input, output); return; case 4: SIMD_fastUnpack4(input, output); return; case 5: SIMD_fastUnpack5(input, output); return; - // TODO 6 .. 32 + case 6: SIMD_fastUnpack6(input, output); return; + case 7: SIMD_fastUnpack7(input, output); return; + case 8: SIMD_fastUnpack8(input, output); return; + case 9: SIMD_fastUnpack9(input, output); return; + case 10: SIMD_fastUnpack10(input, output); return; + case 11: SIMD_fastUnpack11(input, output); return; + case 12: SIMD_fastUnpack12(input, output); return; + case 13: SIMD_fastUnpack13(input, output); return; + case 14: SIMD_fastUnpack14(input, output); return; + case 15: SIMD_fastUnpack15(input, output); return; + case 16: SIMD_fastUnpack16(input, output); return; + case 17: SIMD_fastUnpack17(input, output); return; + case 18: SIMD_fastUnpack18(input, output); return; + case 19: SIMD_fastUnpack19(input, output); return; + case 20: SIMD_fastUnpack20(input, output); return; + case 21: SIMD_fastUnpack21(input, output); return; + case 22: SIMD_fastUnpack22(input, output); return; + case 23: SIMD_fastUnpack23(input, output); return; + case 24: SIMD_fastUnpack24(input, output); return; + case 25: SIMD_fastUnpack25(input, output); return; + case 26: SIMD_fastUnpack26(input, output); return; + case 27: SIMD_fastUnpack27(input, output); return; + case 28: SIMD_fastUnpack28(input, output); return; + case 29: SIMD_fastUnpack29(input, output); return; + case 30: SIMD_fastUnpack30(input, output); return; + case 31: SIMD_fastUnpack31(input, output); return; default : throw new UnsupportedOperationException(); } } @@ -537,6 +587,4146 @@ static void SIMD_fastPack5(int[] input, int[] output) { outVec.intoArray(output, outOff); } + // SIMD_fastpackwithoutmask6_32 + static void SIMD_fastPack6(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask7_32 + static void SIMD_fastPack7(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 7).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 21).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 3).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 17).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 31).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 1); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 13).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 27).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 9).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 23).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 5).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 19).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 1).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 15).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 29).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 11).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 25).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask8_32 + static void SIMD_fastPack8(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask9_32 + static void SIMD_fastPack9(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 9).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 27).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 13).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 31).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 1); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 17).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 3).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 21).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 7).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 25).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 11).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 29).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 15).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 1).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 19).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 5).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 23).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask10_32 + static void SIMD_fastPack10(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask11_32 + static void SIMD_fastPack11(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 11).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 1).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 23).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 13).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 3).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 25).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 15).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 5).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 27).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 17).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 7).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 29).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 19).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 9).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 31).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 1); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 21).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask12_32 + static void SIMD_fastPack12(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask13_32 + static void SIMD_fastPack13(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 13).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 7).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 1).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 27).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 21).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 15).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 9).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 3).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 29).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 23).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 17).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 11).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 5).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 31).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 1); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 25).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 19).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask14_32 + static void SIMD_fastPack14(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask15_32 + static void SIMD_fastPack15(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 15).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 13).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 11).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 9).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 7).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 5).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 3).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 1).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 31).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 1); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 29).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 27).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 25).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 23).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 21).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 19).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 17).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask16_32 + static void SIMD_fastPack16(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask17_32 + static void SIMD_fastPack17(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 17).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 19).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 21).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 23).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 25).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 27).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 29).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 31).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 1); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 1).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 3).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 5).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 7).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 9).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 11).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 13).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 15).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask18_32 + static void SIMD_fastPack18(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask19_32 + static void SIMD_fastPack19(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 19).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 25).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 31).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 1); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 5).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 11).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 17).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 23).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 29).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 3).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 9).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 15).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 21).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 27).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 1).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 7).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 13).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask20_32 + static void SIMD_fastPack20(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask21_32 + static void SIMD_fastPack21(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 21).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 31).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 1); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 9).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 19).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 29).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 7).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 17).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 27).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 5).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 15).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 25).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 3).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 13).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 23).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 1).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 11).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask22_32 + static void SIMD_fastPack22(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask23_32 + static void SIMD_fastPack23(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 23).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 5).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 19).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 1).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 15).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 29).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 11).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 25).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 7).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 21).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 3).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 17).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 31).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 1); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 13).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 27).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 9).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask24_32 + static void SIMD_fastPack24(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask25_32 + static void SIMD_fastPack25(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 25).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 11).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 29).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 15).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 1).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 19).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 5).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 23).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 9).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 27).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 13).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 31).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 1); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 17).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 3).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 21).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 7).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask26_32 + static void SIMD_fastPack26(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask27_32 + static void SIMD_fastPack27(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 27).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 17).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 7).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 29).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 19).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 9).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 31).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 1); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 21).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 11).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 1).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 23).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 13).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 3).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 25).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 15).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 5).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask28_32 + static void SIMD_fastPack28(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask29_32 + static void SIMD_fastPack29(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 29).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 23).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 17).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 11).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 5).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 31).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 1); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 25).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 19).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 13).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 7).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 1).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 27).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 21).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 15).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 9).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 3).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask30_32 + static void SIMD_fastPack30(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + outVec.intoArray(output, outOff); + } + + // SIMD_fastpackwithoutmask31_32 + static void SIMD_fastPack31(int[] input, int[] output) { + int inOff = 0; + int outOff = 0; + + IntVector outVec; + IntVector inVec = IntVector.fromArray(SPECIES_128, input, inOff); + + outVec = inVec; + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 31).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 1); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 30).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 29).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 28).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 27).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 26).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 25).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 24).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 23).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 22).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 21).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 20).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 19).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 18).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 17).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 16).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 15).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 14).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 13).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 12).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 11).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 10).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 9).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 8).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 7).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 6).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 5).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 4).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 3).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 2).or(outVec); + outVec.intoArray(output, outOff); + outOff+=4; + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHL, 1).or(outVec); + outVec.intoArray(output, outOff); + } + // __SIMD_fastunpack1_32 static void SIMD_fastUnpack1(int[] input, int[] output) { IntVector inVec1 = IntVector.fromArray(SPECIES_128, input, 0); @@ -545,455 +4735,4096 @@ static void SIMD_fastUnpack1(int[] input, int[] output) { final int mask = 1; int shift = 0; - for (int i = 0; i < 8; i++) { - outVec1 = inVec1.lanewise(VectorOperators.LSHR, shift++).and(mask); - outVec2 = inVec2.lanewise(VectorOperators.LSHR, shift++).and(mask); - outVec3 = inVec1.lanewise(VectorOperators.LSHR, shift++).and(mask); - outVec4 = inVec2.lanewise(VectorOperators.LSHR, shift++).and(mask); - outVec1.intoArray(output, i * 16 + 0); - outVec2.intoArray(output, i * 16 + 4); - outVec3.intoArray(output, i * 16 + 8); - outVec4.intoArray(output, i * 16 + 12); - } + for (int i = 0; i < 8; i++) { + outVec1 = inVec1.lanewise(VectorOperators.LSHR, shift++).and(mask); + outVec2 = inVec2.lanewise(VectorOperators.LSHR, shift++).and(mask); + outVec3 = inVec1.lanewise(VectorOperators.LSHR, shift++).and(mask); + outVec4 = inVec2.lanewise(VectorOperators.LSHR, shift++).and(mask); + outVec1.intoArray(output, i * 16 + 0); + outVec2.intoArray(output, i * 16 + 4); + outVec3.intoArray(output, i * 16 + 8); + outVec4.intoArray(output, i * 16 + 12); + } + } + + // __SIMD_fastunpack2_32 + static void SIMD_fastUnpack2(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 2) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30).and(mask); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack3_32 + static void SIMD_fastUnpack3(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 3) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 9).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 15).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 21).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 27).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3 - 1).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 7).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 13).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 19).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 25).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3 - 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 11).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 17).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 23).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 29).and(mask); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack4_32 + static void SIMD_fastUnpack4(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 4) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack5_32 + static void SIMD_fastUnpack5(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 5) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 15).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 25).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5 - 3).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 13).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 23).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5 - 1).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 11).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 21).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5 - 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 9).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 19).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 29).and(mask); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5 - 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 7).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 17).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 27).and(mask); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack6_32 + static void SIMD_fastUnpack6(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 6) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack7_32 + static void SIMD_fastUnpack7(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 7) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 7).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 21).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 17).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 1).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 13).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 9).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 23).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 19).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 15).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 11).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack8_32 + static void SIMD_fastUnpack8(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 8) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack9_32 + static void SIMD_fastUnpack9(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 9) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 9).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 13).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 1).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 17).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 21).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 7).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 7).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 11).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 15).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 19).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack10_32 + static void SIMD_fastUnpack10(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 10) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack11_32 + static void SIMD_fastUnpack11(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 11) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 11).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 9).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 13).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 7).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 15).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 17).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 7).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 19).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 9).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 1).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack12_32 + static void SIMD_fastUnpack12(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 12) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack13_32 + static void SIMD_fastUnpack13(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 13) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 13).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 7).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 11).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 15).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 9).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 9).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 17).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 11).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 1).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 7).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack14_32 + static void SIMD_fastUnpack14(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 14) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack15_32 + static void SIMD_fastUnpack15(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 15) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 15).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 13).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 11).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 9).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 7).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 1).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 7).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 9).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 11).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 13).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack16_32 + static void SIMD_fastUnpack16(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 16) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack17_32 + static void SIMD_fastUnpack17(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 17) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 15).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 13).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 11).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 9).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 7).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 1).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 7).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 9).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 11).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 13).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack18_32 + static void SIMD_fastUnpack18(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 18) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack19_32 + static void SIMD_fastUnpack19(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 19) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 13).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 7).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 1).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 11).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 15).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 9).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 9).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 17).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 11).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 18).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 7).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack20_32 + static void SIMD_fastUnpack20(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 20) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack21_32 + static void SIMD_fastUnpack21(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 21) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 11).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 1).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 9).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 13).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 7).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 15).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 17).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 7).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 18).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 19).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 9).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack22_32 + static void SIMD_fastUnpack22(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 22) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 18).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 18).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack23_32 + static void SIMD_fastUnpack23(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 23) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 9).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 18).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 13).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 22).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 17).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 21).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 7).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 7).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 11).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 15).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 1).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 19).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack24_32 + static void SIMD_fastUnpack24(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 24) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack25_32 + static void SIMD_fastUnpack25(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 25) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 7).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 21).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 17).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 24).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 13).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 9).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 23).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 19).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 1).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 15).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 22).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 11).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 18).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + outVec.intoArray(output, outOff+=4); } - // __SIMD_fastunpack2_32 - static void SIMD_fastUnpack2(int[] input, int[] output) { + // __SIMD_fastunpack26_32 + static void SIMD_fastUnpack26(int[] input, int[] output) { IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); IntVector outVec; int inOff = 0; int outOff = 0; - final int mask = (1 << 2) - 1; + final int mask = (1 << 26) - 1; outVec = inVec.and(mask); outVec.intoArray(output, outOff); + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 18).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 24).and(mask)); + outVec.intoArray(output, outOff+=4); + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); outVec.intoArray(output, outOff+=4); + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 22).and(mask)); + outVec.intoArray(output, outOff+=4); + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); outVec.intoArray(output, outOff+=4); + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); + outVec.intoArray(output, outOff+=4); + outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 18).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 24).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 22).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack27_32 + static void SIMD_fastUnpack27(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 27) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 15).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 25).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 13).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 18).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 23).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 1).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 11).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 21).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 26).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 9).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 19).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 24).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 7).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 17).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 22).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 5); outVec.intoArray(output, outOff+=4); + } + + // __SIMD_fastunpack28_32 + static void SIMD_fastUnpack28(int[] input, int[] output) { + IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); + IntVector outVec; + int inOff = 0; + int outOff = 0; + final int mask = (1 << 28) - 1; + + outVec = inVec.and(mask); + outVec.intoArray(output, outOff); + + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 24).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec.intoArray(output, outOff+=4); + + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 22).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 26).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 24).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 30).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); outVec.intoArray(output, outOff+=4); inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); - outVec = inVec.and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 24).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 22).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 26).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 24).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 30).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 4); outVec.intoArray(output, outOff+=4); } - // __SIMD_fastunpack3_32 - static void SIMD_fastUnpack3(int[] input, int[] output) { + // __SIMD_fastunpack29_32 + static void SIMD_fastUnpack29(int[] input, int[] output) { IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); IntVector outVec; int inOff = 0; int outOff = 0; - final int mask = (1 << 3) - 1; + final int mask = (1 << 29) - 1; outVec = inVec.and(mask); outVec.intoArray(output, outOff); - outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); - outVec.intoArray(output, outOff+=4); - - outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 9).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 9).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 15).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 15).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 21).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 18).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 21).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 27).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 24).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 30); + outVec = inVec.lanewise(VectorOperators.LSHR, 5); inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); - outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3 - 1).and(mask)); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 27).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 1).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 7).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 7).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 13).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 13).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 19).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 22).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 13); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 19).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 25).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 22).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 7); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 25).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 31); + outVec = inVec.lanewise(VectorOperators.LSHR, 4); inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); - outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3 - 2).and(mask)); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 28).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 11).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 11).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 17).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 17).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 23).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 26).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 23).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 29).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 26).and(mask)); + outVec.intoArray(output, outOff+=4); + + outVec = inVec.lanewise(VectorOperators.LSHR, 3); outVec.intoArray(output, outOff+=4); } - // __SIMD_fastunpack4_32 - static void SIMD_fastUnpack4(int[] input, int[] output) { + // __SIMD_fastunpack30_32 + static void SIMD_fastUnpack30(int[] input, int[] output) { IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); IntVector outVec; int inOff = 0; int outOff = 0; - final int mask = (1 << 4) - 1; + final int mask = (1 << 30) - 1; outVec = inVec.and(mask); outVec.intoArray(output, outOff); - outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); outVec.intoArray(output, outOff+=4); + outVec = inVec.lanewise(VectorOperators.LSHR, 16); inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); - - outVec = inVec.and(mask); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 18).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 22).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 24).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 26).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 28).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); outVec.intoArray(output, outOff+=4); inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); - outVec = inVec.and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 0).and(mask); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 26); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 20); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); outVec.intoArray(output, outOff+=4); + outVec = inVec.lanewise(VectorOperators.LSHR, 16); inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); - - outVec = inVec.and(mask); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 18).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 22).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 24).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 26).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 28).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 28).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 2); outVec.intoArray(output, outOff+=4); } - // __SIMD_fastunpack5_32 - static void SIMD_fastUnpack5(int[] input, int[] output) { + // __SIMD_fastunpack31_32 + static void SIMD_fastUnpack31(int[] input, int[] output) { IntVector inVec = IntVector.fromArray(SPECIES_128, input, 0); IntVector outVec; int inOff = 0; int outOff = 0; - final int mask = (1 << 5) - 1; + final int mask = (1 << 31) - 1; outVec = inVec.and(mask); outVec.intoArray(output, outOff); - outVec = inVec.lanewise(VectorOperators.LSHR, 5).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 31); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 1).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 10).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 30); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 2).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 15).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 29); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 3).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 20).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 28); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 4).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 25).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 27); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 30); + outVec = inVec.lanewise(VectorOperators.LSHR, 26); inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); - outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5 - 3).and(mask)); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 6).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 3).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 25); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 7).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 8).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 24); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 8).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 13).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 23); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 9).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 18).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 22); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 10).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 23).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 21); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 11).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 28); + outVec = inVec.lanewise(VectorOperators.LSHR, 20); inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); - outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5 - 1).and(mask)); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 12).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 1).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 19); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 13).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 6).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 18); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 14).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 11).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 17); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 15).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 16).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 16); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 16).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 21).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 15); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 17).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 26).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 14); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 18).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 31); + outVec = inVec.lanewise(VectorOperators.LSHR, 13); inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); - outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5 - 4).and(mask)); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 19).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 4).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 12); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 20).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 9).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 11); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 21).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 14).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 10); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 22).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 19).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 9); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 23).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 24).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 8); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 24).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 29).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 7); inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); - outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 5 - 2).and(mask)); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 25).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 2).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 6); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 26).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 7).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 5); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 27).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 12).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 4); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 28).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 17).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 3); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 29).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 22).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 2); + inVec = IntVector.fromArray(SPECIES_128, input, inOff+=4); + outVec = outVec.or(inVec.lanewise(VectorOperators.LSHL, 30).and(mask)); outVec.intoArray(output, outOff+=4); - outVec = inVec.lanewise(VectorOperators.LSHR, 27).and(mask); + outVec = inVec.lanewise(VectorOperators.LSHR, 1); outVec.intoArray(output, outOff+=4); } + } \ No newline at end of file diff --git a/src/test/java/org/apache/lucene/test/TestSimdBitPacking.java b/src/test/java/org/apache/lucene/test/TestSimdBitPacking.java index 6d92f0e..498520b 100644 --- a/src/test/java/org/apache/lucene/test/TestSimdBitPacking.java +++ b/src/test/java/org/apache/lucene/test/TestSimdBitPacking.java @@ -30,78 +30,32 @@ public class TestSimdBitPacking { final Random random = new Random(); - @Test - public void packUnpack1() { - final int bitsPerValue = 1; - int[] packed = new int[bitsPerValue * 4]; - for (int i = 0; i < 100; i++) { - int[] input = IntStream.range(0, 128).map(x -> random.nextInt(1 << bitsPerValue)).toArray(); - int[] copy = Arrays.copyOf(input, input.length); - simdPack(input, packed, bitsPerValue); - int[] unpacked = new int[128]; - simdUnpack(packed, unpacked, bitsPerValue); - assertArrayEquals(input, unpacked); - assertArrayEquals(input, copy); - } - } - - @Test - public void packUnpack2() { - final int bitsPerValue = 2; - int[] packed = new int[bitsPerValue * 4]; - for (int i = 0; i < 100; i++) { - int[] input = IntStream.range(0, 128).map(x -> random.nextInt(1 << bitsPerValue)).toArray(); - int[] copy = Arrays.copyOf(input, input.length); - simdPack(input, packed, bitsPerValue); - int[] unpacked = new int[128]; - simdUnpack(packed, unpacked, bitsPerValue); - assertArrayEquals(input, unpacked); - assertArrayEquals(input, copy); - } - } - - @Test - public void packUnpack3() { - final int bitsPerValue = 3; - int[] packed = new int[bitsPerValue * 4]; - for (int i = 0; i < 100; i++) { - int[] input = IntStream.range(0, 128).map(x -> random.nextInt(1 << bitsPerValue)).toArray(); - int[] copy = Arrays.copyOf(input, input.length); - simdPack(input, packed, bitsPerValue); - int[] unpacked = new int[128]; - simdUnpack(packed, unpacked, bitsPerValue); - assertArrayEquals(input, unpacked); - assertArrayEquals(input, copy); - } - } - - @Test - public void packUnpack4() { - final int bitsPerValue = 4; - int[] packed = new int[bitsPerValue * 4]; - for (int i = 0; i < 100; i++) { - int[] input = IntStream.range(0, 128).map(x -> random.nextInt(1 << bitsPerValue)).toArray(); - int[] copy = Arrays.copyOf(input, input.length); - simdPack(input, packed, bitsPerValue); - int[] unpacked = new int[128]; - simdUnpack(packed, unpacked, bitsPerValue); - assertArrayEquals(input, unpacked); - assertArrayEquals(input, copy); - } - } - @Test - public void packUnpack5() { - final int bitsPerValue = 5; - int[] packed = new int[bitsPerValue * 4]; + public void packUnpack() { + for(int bitsPerValue = 1; bitsPerValue < 31; bitsPerValue++) { + int[] packed = new int[bitsPerValue * 4]; + for (int i = 0; i < 100; i++) { + int finalBitsPerValue = bitsPerValue; + int[] input = IntStream.range(0, 128).map(x -> random.nextInt(1 << finalBitsPerValue)).toArray(); + int[] copy = Arrays.copyOf(input, input.length); + simdPack(input, packed, bitsPerValue); + int[] unpacked = new int[128]; + simdUnpack(packed, unpacked, bitsPerValue); + assertArrayEquals(input, unpacked); + assertArrayEquals(input, copy); + } + } + + int[] packed = new int[31 * 4]; for (int i = 0; i < 100; i++) { - int[] input = IntStream.range(0, 128).map(x -> random.nextInt(1 << bitsPerValue)).toArray(); + int[] input = IntStream.range(0, 128).map(x -> random.nextInt(0x40000000, Integer.MAX_VALUE)).toArray(); int[] copy = Arrays.copyOf(input, input.length); - simdPack(input, packed, bitsPerValue); + simdPack(input, packed, 31); int[] unpacked = new int[128]; - simdUnpack(packed, unpacked, bitsPerValue); + simdUnpack(packed, unpacked, 31); assertArrayEquals(input, unpacked); assertArrayEquals(input, copy); } + } }