@@ -172,8 +172,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
172
172
}
173
173
}
174
174
}
175
- "llvm.x86.avx2.vperm2i128" => {
175
+ "llvm.x86.avx2.vperm2i128"
176
+ | "llvm.x86.avx.vperm2f128.ps.256"
177
+ | "llvm.x86.avx.vperm2f128.pd.256" => {
176
178
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256
179
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_ps
180
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_pd
177
181
let ( a, b, imm8) = match args {
178
182
[ a, b, imm8] => ( a, b, imm8) ,
179
183
_ => bug ! ( "wrong number of args for intrinsic {intrinsic}" ) ,
@@ -182,19 +186,11 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
182
186
let b = codegen_operand ( fx, b) ;
183
187
let imm8 = codegen_operand ( fx, imm8) . load_scalar ( fx) ;
184
188
185
- let a_0 = a. value_lane ( fx, 0 ) . load_scalar ( fx) ;
186
- let a_1 = a. value_lane ( fx, 1 ) . load_scalar ( fx) ;
187
- let a_low = fx. bcx . ins ( ) . iconcat ( a_0, a_1) ;
188
- let a_2 = a. value_lane ( fx, 2 ) . load_scalar ( fx) ;
189
- let a_3 = a. value_lane ( fx, 3 ) . load_scalar ( fx) ;
190
- let a_high = fx. bcx . ins ( ) . iconcat ( a_2, a_3) ;
189
+ let a_low = a. value_typed_lane ( fx, fx. tcx . types . u128 , 0 ) . load_scalar ( fx) ;
190
+ let a_high = a. value_typed_lane ( fx, fx. tcx . types . u128 , 1 ) . load_scalar ( fx) ;
191
191
192
- let b_0 = b. value_lane ( fx, 0 ) . load_scalar ( fx) ;
193
- let b_1 = b. value_lane ( fx, 1 ) . load_scalar ( fx) ;
194
- let b_low = fx. bcx . ins ( ) . iconcat ( b_0, b_1) ;
195
- let b_2 = b. value_lane ( fx, 2 ) . load_scalar ( fx) ;
196
- let b_3 = b. value_lane ( fx, 3 ) . load_scalar ( fx) ;
197
- let b_high = fx. bcx . ins ( ) . iconcat ( b_2, b_3) ;
192
+ let b_low = b. value_typed_lane ( fx, fx. tcx . types . u128 , 0 ) . load_scalar ( fx) ;
193
+ let b_high = b. value_typed_lane ( fx, fx. tcx . types . u128 , 1 ) . load_scalar ( fx) ;
198
194
199
195
fn select4 (
200
196
fx : & mut FunctionCx < ' _ , ' _ , ' _ > ,
@@ -219,16 +215,20 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
219
215
220
216
let control0 = imm8;
221
217
let res_low = select4 ( fx, a_high, a_low, b_high, b_low, control0) ;
222
- let ( res_0, res_1) = fx. bcx . ins ( ) . isplit ( res_low) ;
223
218
224
219
let control1 = fx. bcx . ins ( ) . ushr_imm ( imm8, 4 ) ;
225
220
let res_high = select4 ( fx, a_high, a_low, b_high, b_low, control1) ;
226
- let ( res_2, res_3) = fx. bcx . ins ( ) . isplit ( res_high) ;
227
221
228
- ret. place_lane ( fx, 0 ) . to_ptr ( ) . store ( fx, res_0, MemFlags :: trusted ( ) ) ;
229
- ret. place_lane ( fx, 1 ) . to_ptr ( ) . store ( fx, res_1, MemFlags :: trusted ( ) ) ;
230
- ret. place_lane ( fx, 2 ) . to_ptr ( ) . store ( fx, res_2, MemFlags :: trusted ( ) ) ;
231
- ret. place_lane ( fx, 3 ) . to_ptr ( ) . store ( fx, res_3, MemFlags :: trusted ( ) ) ;
222
+ ret. place_typed_lane ( fx, fx. tcx . types . u128 , 0 ) . to_ptr ( ) . store (
223
+ fx,
224
+ res_low,
225
+ MemFlags :: trusted ( ) ,
226
+ ) ;
227
+ ret. place_typed_lane ( fx, fx. tcx . types . u128 , 1 ) . to_ptr ( ) . store (
228
+ fx,
229
+ res_high,
230
+ MemFlags :: trusted ( ) ,
231
+ ) ;
232
232
}
233
233
"llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => {
234
234
let a = match args {
0 commit comments