Skip to content

Commit

Permalink
Fix mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
antoyo committed Sep 27, 2024
1 parent 7d4d356 commit fe7d918
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 4 deletions.
2 changes: 2 additions & 0 deletions src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
);
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
// TODO: remove bitcast now that vector types can be compared?
// ==> We use bitcast to avoid having to do many manual casts from e.g. __m256i to __v32qi (in
// the case of _mm256_aesenc_epi128).
self.bitcast(actual_val, expected_ty)
}
} else {
Expand Down
18 changes: 14 additions & 4 deletions src/intrinsic/llvm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,13 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
new_args[2] = builder.context.new_cast(None, new_args[2], builder.double_type);
args = new_args.into();
}
"__builtin_ia32_sqrtsh_mask_round" => {
"__builtin_ia32_sqrtsh_mask_round"
| "__builtin_ia32_vcvtss2sh_mask_round"
| "__builtin_ia32_vcvtsd2sh_mask_round"
| "__builtin_ia32_vcvtsh2ss_mask_round"
| "__builtin_ia32_vcvtsh2sd_mask_round"
| "__builtin_ia32_rcpsh_mask"
| "__builtin_ia32_rsqrtsh_mask" => {
// The first two arguments are inverted, so swap them.
let mut new_args = args.to_vec();
new_args.swap(0, 1);
Expand Down Expand Up @@ -1192,11 +1198,11 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
"llvm.x86.avx512fp16.mask.vfcmul.csh" => "__builtin_ia32_vfcmulcsh_mask_round",
"llvm.x86.avx512fp16.mask.vfmadd.cph.512" => "__builtin_ia32_vfmaddcph512_mask3_round",
"llvm.x86.avx512fp16.maskz.vfmadd.cph.512" => "__builtin_ia32_vfmaddcph512_maskz_round",
"llvm.x86.avx512fp16.mask.vfmadd.csh" => "__builtin_ia32_vfmaddcsh_mask3_round",
"llvm.x86.avx512fp16.mask.vfmadd.csh" => "__builtin_ia32_vfmaddcsh_mask_round",
"llvm.x86.avx512fp16.maskz.vfmadd.csh" => "__builtin_ia32_vfmaddcsh_maskz_round",
"llvm.x86.avx512fp16.mask.vfcmadd.cph.512" => "__builtin_ia32_vfcmaddcph512_mask3_round",
"llvm.x86.avx512fp16.maskz.vfcmadd.cph.512" => "__builtin_ia32_vfcmaddcph512_maskz_round",
"llvm.x86.avx512fp16.mask.vfcmadd.csh" => "__builtin_ia32_vfcmaddcsh_mask_round",
"llvm.x86.avx512fp16.mask.vfcmadd.csh" => "__builtin_ia32_vfcmaddcsh_mask3_round",
"llvm.x86.avx512fp16.maskz.vfcmadd.csh" => "__builtin_ia32_vfcmaddcsh_maskz_round",
"llvm.x86.avx512fp16.vfmadd.ph.512" => "__builtin_ia32_vfmaddph512_mask",
"llvm.x86.avx512fp16.vcvtsi642sh" => "__builtin_ia32_vcvtsi2sh64_round",
Expand All @@ -1209,7 +1215,7 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
"llvm.x86.avx512.mask.load.pd.256" => "__builtin_ia32_loadapd256_mask",
"llvm.x86.avx512.mask.load.d.128" => "__builtin_ia32_movdqa32load128_mask",
"llvm.x86.avx512.mask.load.q.128" => "__builtin_ia32_movdqa64load128_mask",
"llvm.x86.avx512.mask.load.ps.128" => "__builtin_ia32_movdqa64load128_mask",
"llvm.x86.avx512.mask.load.ps.128" => "__builtin_ia32_loadaps128_mask",
"llvm.x86.avx512.mask.load.pd.128" => "__builtin_ia32_loadapd128_mask",
"llvm.x86.avx512.mask.storeu.d.256" => "__builtin_ia32_storedqusi256_mask",
"llvm.x86.avx512.mask.storeu.q.256" => "__builtin_ia32_storedqudi256_mask",
Expand Down Expand Up @@ -1283,6 +1289,10 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
"llvm.x86.avx512fp16.mask.vcvttph2uqq.512" => "__builtin_ia32_vcvttph2uqq512_mask_round",
"llvm.x86.avx512fp16.mask.vcvtph2psx.512" => "__builtin_ia32_vcvtph2psx512_mask_round",
"llvm.x86.avx512fp16.mask.vcvtph2pd.512" => "__builtin_ia32_vcvtph2pd512_mask_round",
"llvm.x86.avx512fp16.mask.vfcmadd.cph.256" => "__builtin_ia32_vfcmaddcph256_mask3",
"llvm.x86.avx512fp16.mask.vfmadd.cph.256" => "__builtin_ia32_vfmaddcph256_mask3",
"llvm.x86.avx512fp16.mask.vfcmadd.cph.128" => "__builtin_ia32_vfcmaddcph128_mask3",
"llvm.x86.avx512fp16.mask.vfmadd.cph.128" => "__builtin_ia32_vfmaddcph128_mask3",

// TODO: support the tile builtins:
"llvm.x86.ldtilecfg" => "__builtin_trap",
Expand Down

0 comments on commit fe7d918

Please sign in to comment.