@@ -413,6 +413,77 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
413
413
ret. place_lane ( fx, out_lane_idx) . write_cvalue ( fx, res_lane) ;
414
414
}
415
415
}
416
+
417
+ "llvm.x86.ssse3.pmul.hr.sw.128" => {
418
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16&ig_expand=4782
419
+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
420
+
421
+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
422
+ let layout = a. layout ( ) ;
423
+
424
+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
425
+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
426
+ assert_eq ! ( lane_ty, fx. tcx. types. i16 ) ;
427
+ assert_eq ! ( ret_lane_ty, fx. tcx. types. i16 ) ;
428
+ assert_eq ! ( lane_count, ret_lane_count) ;
429
+
430
+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . i16 ) ;
431
+ for out_lane_idx in 0 ..lane_count {
432
+ let a_lane = a. value_lane ( fx, out_lane_idx) . load_scalar ( fx) ;
433
+ let a_lane = fx. bcx . ins ( ) . sextend ( types:: I32 , a_lane) ;
434
+ let b_lane = b. value_lane ( fx, out_lane_idx) . load_scalar ( fx) ;
435
+ let b_lane = fx. bcx . ins ( ) . sextend ( types:: I32 , b_lane) ;
436
+
437
+ let mul: Value = fx. bcx . ins ( ) . imul ( a_lane, b_lane) ;
438
+ let shifted = fx. bcx . ins ( ) . ushr_imm ( mul, 14 ) ;
439
+ let incremented = fx. bcx . ins ( ) . iadd_imm ( shifted, 1 ) ;
440
+ let shifted_again = fx. bcx . ins ( ) . ushr_imm ( incremented, 1 ) ;
441
+
442
+ let res_lane = fx. bcx . ins ( ) . ireduce ( types:: I16 , shifted_again) ;
443
+ let res_lane = CValue :: by_val ( res_lane, ret_lane_layout) ;
444
+
445
+ ret. place_lane ( fx, out_lane_idx) . write_cvalue ( fx, res_lane) ;
446
+ }
447
+ }
448
+
449
+ "llvm.x86.sse2.packuswb.128" => {
450
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16&ig_expand=4903
451
+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
452
+
453
+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
454
+ let layout = a. layout ( ) ;
455
+
456
+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
457
+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
458
+ assert_eq ! ( lane_ty, fx. tcx. types. i16 ) ;
459
+ assert_eq ! ( ret_lane_ty, fx. tcx. types. u8 ) ;
460
+ assert_eq ! ( lane_count * 2 , ret_lane_count) ;
461
+
462
+ let zero = fx. bcx . ins ( ) . iconst ( types:: I16 , 0 ) ;
463
+ let max_u8 = fx. bcx . ins ( ) . iconst ( types:: I16 , 255 ) ;
464
+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . u8 ) ;
465
+
466
+ for idx in 0 ..lane_count {
467
+ let lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
468
+ let sat = fx. bcx . ins ( ) . smax ( lane, zero) ;
469
+ let sat = fx. bcx . ins ( ) . umin ( sat, max_u8) ;
470
+ let res = fx. bcx . ins ( ) . ireduce ( types:: I8 , sat) ;
471
+
472
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
473
+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
474
+ }
475
+
476
+ for idx in 0 ..lane_count {
477
+ let lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
478
+ let sat = fx. bcx . ins ( ) . smax ( lane, zero) ;
479
+ let sat = fx. bcx . ins ( ) . umin ( sat, max_u8) ;
480
+ let res = fx. bcx . ins ( ) . ireduce ( types:: I8 , sat) ;
481
+
482
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
483
+ ret. place_lane ( fx, lane_count + idx) . write_cvalue ( fx, res_lane) ;
484
+ }
485
+ }
486
+
416
487
_ => {
417
488
fx. tcx
418
489
. sess
0 commit comments