diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index b60dde9dfbc6d..2819773d9f9ed 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -2000,16 +2000,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case RS6000_BIF_VCMPEQUH: case RS6000_BIF_VCMPEQUW: case RS6000_BIF_VCMPEQUD: - /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple - folding produces worse code for 128-bit compares. */ + case RS6000_BIF_VCMPEQUT: fold_compare_helper (gsi, EQ_EXPR, stmt); return true; case RS6000_BIF_VCMPNEB: case RS6000_BIF_VCMPNEH: case RS6000_BIF_VCMPNEW: - /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple - folding produces worse code for 128-bit compares. */ + case RS6000_BIF_VCMPNET: fold_compare_helper (gsi, NE_EXPR, stmt); return true; @@ -2021,9 +2019,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case RS6000_BIF_CMPGE_U4SI: case RS6000_BIF_CMPGE_2DI: case RS6000_BIF_CMPGE_U2DI: - /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI - for now, because gimple folding produces worse code for 128-bit - compares. */ + case RS6000_BIF_CMPGE_1TI: + case RS6000_BIF_CMPGE_U1TI: fold_compare_helper (gsi, GE_EXPR, stmt); return true; @@ -2035,9 +2032,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case RS6000_BIF_VCMPGTUW: case RS6000_BIF_VCMPGTUD: case RS6000_BIF_VCMPGTSD: - /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST - for now, because gimple folding produces worse code for 128-bit - compares. */ + case RS6000_BIF_VCMPGTUT: + case RS6000_BIF_VCMPGTST: fold_compare_helper (gsi, GT_EXPR, stmt); return true; @@ -2049,9 +2045,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case RS6000_BIF_CMPLE_U4SI: case RS6000_BIF_CMPLE_2DI: case RS6000_BIF_CMPLE_U2DI: - /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI - for now, because gimple folding produces worse code for 128-bit - compares. */ + case RS6000_BIF_CMPLE_1TI: + case RS6000_BIF_CMPLE_U1TI: fold_compare_helper (gsi, LE_EXPR, stmt); return true; diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 4d0797c48f8f3..a0d33d2f60443 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -26,6 +26,9 @@ ;; Vector int modes (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI]) +;; Vector int modes for comparison, shift and rotation +(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")]) + ;; 128-bit int modes (define_mode_iterator VEC_TI [V1TI TI]) @@ -533,10 +536,10 @@ ;; For signed integer vectors comparison. (define_expand "vec_cmp" - [(set (match_operand:VEC_I 0 "vint_operand") + [(set (match_operand:VEC_IC 0 "vint_operand") (match_operator 1 "signed_or_equality_comparison_operator" - [(match_operand:VEC_I 2 "vint_operand") - (match_operand:VEC_I 3 "vint_operand")]))] + [(match_operand:VEC_IC 2 "vint_operand") + (match_operand:VEC_IC 3 "vint_operand")]))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" { enum rtx_code code = GET_CODE (operands[1]); @@ -573,10 +576,10 @@ ;; For unsigned integer vectors comparison. (define_expand "vec_cmpu" - [(set (match_operand:VEC_I 0 "vint_operand") + [(set (match_operand:VEC_IC 0 "vint_operand") (match_operator 1 "unsigned_or_equality_comparison_operator" - [(match_operand:VEC_I 2 "vint_operand") - (match_operand:VEC_I 3 "vint_operand")]))] + [(match_operand:VEC_IC 2 "vint_operand") + (match_operand:VEC_IC 3 "vint_operand")]))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" { enum rtx_code code = GET_CODE (operands[1]); @@ -690,116 +693,65 @@ ; >= for integer vectors: swap operands and apply not-greater-than (define_expand "vector_nlt" - [(set (match_operand:VEC_I 3 "vlogical_operand") - (gt:VEC_I (match_operand:VEC_I 2 "vlogical_operand") - (match_operand:VEC_I 1 "vlogical_operand"))) - (set (match_operand:VEC_I 0 "vlogical_operand") - (not:VEC_I (match_dup 3)))] + [(set (match_operand:VEC_IC 3 "vlogical_operand") + (gt:VEC_IC (match_operand:VEC_IC 2 "vlogical_operand") + (match_operand:VEC_IC 1 "vlogical_operand"))) + (set (match_operand:VEC_IC 0 "vlogical_operand") + (not:VEC_IC (match_dup 3)))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" { operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) -(define_expand "vector_nltv1ti" - [(set (match_operand:V1TI 3 "vlogical_operand") - (gt:V1TI (match_operand:V1TI 2 "vlogical_operand") - (match_operand:V1TI 1 "vlogical_operand"))) - (set (match_operand:V1TI 0 "vlogical_operand") - (not:V1TI (match_dup 3)))] - "TARGET_POWER10" -{ - operands[3] = gen_reg_rtx_and_attrs (operands[0]); -}) - (define_expand "vector_gtu" - [(set (match_operand:VEC_I 0 "vint_operand") - (gtu:VEC_I (match_operand:VEC_I 1 "vint_operand") - (match_operand:VEC_I 2 "vint_operand")))] + [(set (match_operand:VEC_IC 0 "vint_operand") + (gtu:VEC_IC (match_operand:VEC_IC 1 "vint_operand") + (match_operand:VEC_IC 2 "vint_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") -(define_expand "vector_gtuv1ti" - [(set (match_operand:V1TI 0 "altivec_register_operand") - (gtu:V1TI (match_operand:V1TI 1 "altivec_register_operand") - (match_operand:V1TI 2 "altivec_register_operand")))] - "TARGET_POWER10" - "") - ; >= for integer vectors: swap operands and apply not-greater-than (define_expand "vector_nltu" - [(set (match_operand:VEC_I 3 "vlogical_operand") - (gtu:VEC_I (match_operand:VEC_I 2 "vlogical_operand") - (match_operand:VEC_I 1 "vlogical_operand"))) - (set (match_operand:VEC_I 0 "vlogical_operand") - (not:VEC_I (match_dup 3)))] + [(set (match_operand:VEC_IC 3 "vlogical_operand") + (gtu:VEC_IC (match_operand:VEC_IC 2 "vlogical_operand") + (match_operand:VEC_IC 1 "vlogical_operand"))) + (set (match_operand:VEC_IC 0 "vlogical_operand") + (not:VEC_IC (match_dup 3)))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" { operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) -(define_expand "vector_nltuv1ti" - [(set (match_operand:V1TI 3 "vlogical_operand") - (gtu:V1TI (match_operand:V1TI 2 "vlogical_operand") - (match_operand:V1TI 1 "vlogical_operand"))) - (set (match_operand:V1TI 0 "vlogical_operand") - (not:V1TI (match_dup 3)))] - "TARGET_POWER10" -{ - operands[3] = gen_reg_rtx_and_attrs (operands[0]); -}) - (define_expand "vector_geu" - [(set (match_operand:VEC_I 0 "vint_operand") - (geu:VEC_I (match_operand:VEC_I 1 "vint_operand") - (match_operand:VEC_I 2 "vint_operand")))] + [(set (match_operand:VEC_IC 0 "vint_operand") + (geu:VEC_IC (match_operand:VEC_IC 1 "vint_operand") + (match_operand:VEC_IC 2 "vint_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") ; <= for integer vectors: apply not-greater-than (define_expand "vector_ngt" - [(set (match_operand:VEC_I 3 "vlogical_operand") - (gt:VEC_I (match_operand:VEC_I 1 "vlogical_operand") - (match_operand:VEC_I 2 "vlogical_operand"))) - (set (match_operand:VEC_I 0 "vlogical_operand") - (not:VEC_I (match_dup 3)))] + [(set (match_operand:VEC_IC 3 "vlogical_operand") + (gt:VEC_IC (match_operand:VEC_IC 1 "vlogical_operand") + (match_operand:VEC_IC 2 "vlogical_operand"))) + (set (match_operand:VEC_IC 0 "vlogical_operand") + (not:VEC_IC (match_dup 3)))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" { operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) -(define_expand "vector_ngtv1ti" - [(set (match_operand:V1TI 3 "vlogical_operand") - (gt:V1TI (match_operand:V1TI 1 "vlogical_operand") - (match_operand:V1TI 2 "vlogical_operand"))) - (set (match_operand:V1TI 0 "vlogical_operand") - (not:V1TI (match_dup 3)))] - "TARGET_POWER10" -{ - operands[3] = gen_reg_rtx_and_attrs (operands[0]); -}) - (define_expand "vector_ngtu" - [(set (match_operand:VEC_I 3 "vlogical_operand") - (gtu:VEC_I (match_operand:VEC_I 1 "vlogical_operand") - (match_operand:VEC_I 2 "vlogical_operand"))) - (set (match_operand:VEC_I 0 "vlogical_operand") - (not:VEC_I (match_dup 3)))] + [(set (match_operand:VEC_IC 3 "vlogical_operand") + (gtu:VEC_IC (match_operand:VEC_IC 1 "vlogical_operand") + (match_operand:VEC_IC 2 "vlogical_operand"))) + (set (match_operand:VEC_IC 0 "vlogical_operand") + (not:VEC_IC (match_dup 3)))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" { operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) -(define_expand "vector_ngtuv1ti" - [(set (match_operand:V1TI 3 "vlogical_operand") - (gtu:V1TI (match_operand:V1TI 1 "vlogical_operand") - (match_operand:V1TI 2 "vlogical_operand"))) - (set (match_operand:V1TI 0 "vlogical_operand") - (not:V1TI (match_dup 3)))] - "TARGET_POWER10" -{ - operands[3] = gen_reg_rtx_and_attrs (operands[0]); -}) - ; There are 14 possible vector FP comparison operators, gt and eq of them have ; been expanded above, so just support 12 remaining operators here. @@ -1189,27 +1141,15 @@ (define_expand "vector_gtu__p" [(parallel [(set (reg:CC CR6_REGNO) - (unspec:CC [(gtu:CC (match_operand:VEC_I 1 "vint_operand") - (match_operand:VEC_I 2 "vint_operand"))] + (unspec:CC [(gtu:CC (match_operand:VEC_IC 1 "vint_operand") + (match_operand:VEC_IC 2 "vint_operand"))] UNSPEC_PREDICATE)) - (set (match_operand:VEC_I 0 "vlogical_operand") - (gtu:VEC_I (match_dup 1) - (match_dup 2)))])] + (set (match_operand:VEC_IC 0 "vlogical_operand") + (gtu:VEC_IC (match_dup 1) + (match_dup 2)))])] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" "") -(define_expand "vector_gtu_v1ti_p" - [(parallel - [(set (reg:CC CR6_REGNO) - (unspec:CC [(gtu:CC (match_operand:V1TI 1 "altivec_register_operand") - (match_operand:V1TI 2 "altivec_register_operand"))] - UNSPEC_PREDICATE)) - (set (match_operand:V1TI 0 "altivec_register_operand") - (gtu:V1TI (match_dup 1) - (match_dup 2)))])] - "TARGET_POWER10" - "") - ;; AltiVec/VSX predicates. ;; This expansion is triggered during expansion of predicate built-in @@ -1582,25 +1522,21 @@ ;; Expanders for rotate each element in a vector (define_expand "vrotl3" - [(set (match_operand:VEC_I 0 "vint_operand") - (rotate:VEC_I (match_operand:VEC_I 1 "vint_operand") - (match_operand:VEC_I 2 "vint_operand")))] + [(set (match_operand:VEC_IC 0 "vint_operand") + (rotate:VEC_IC (match_operand:VEC_IC 1 "vint_operand") + (match_operand:VEC_IC 2 "vint_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" - "") - -(define_expand "vrotlv1ti3" - [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") - (rotate:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") - (match_operand:V1TI 2 "vsx_register_operand" "v")))] - "TARGET_POWER10" { - /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ - rtx tmp = gen_reg_rtx (V1TImode); + /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ + if (mode == V1TImode) + { + rtx tmp = gen_reg_rtx (V1TImode); - emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); - emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp)); - DONE; -}) + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp)); + DONE; + } + }) ;; Expanders for rotatert to make use of vrotl (define_expand "vrotr3" @@ -1663,25 +1599,20 @@ ;; Expanders for arithmetic shift right on each vector element (define_expand "vashr3" - [(set (match_operand:VEC_I 0 "vint_operand") - (ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand") - (match_operand:VEC_I 2 "vint_operand")))] + [(set (match_operand:VEC_IC 0 "vint_operand") + (ashiftrt:VEC_IC (match_operand:VEC_IC 1 "vint_operand") + (match_operand:VEC_IC 2 "vint_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)" - "") - -;; No immediate version of this 128-bit instruction -(define_expand "vashrv1ti3" - [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") - (ashiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") - (match_operand:V1TI 2 "vsx_register_operand" "v")))] - "TARGET_POWER10" { - /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */ - rtx tmp = gen_reg_rtx (V1TImode); + /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ + if (mode == V1TImode) + { + rtx tmp = gen_reg_rtx (V1TImode); - emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); - emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp)); - DONE; + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp)); + DONE; + } }) diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c new file mode 100644 index 0000000000000..fe667b9cb50f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c @@ -0,0 +1,87 @@ +/* Verify that overloaded built-ins for vec_cmp with __int128 + inputs produce the right code. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-require-effective-target int128 } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include + +vector bool __int128 +test_eq (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmpeq (x, y); +} + +vector bool __int128 +testu_eq (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmpeq (x, y); +} + +vector bool __int128 +test_ge (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmpge (x, y); +} + +vector bool __int128 +testu_ge (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmpge (x, y); +} + +vector bool __int128 +test_gt (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmpgt (x, y); +} + +vector bool __int128 +testu_gt (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmpgt (x, y); +} + +vector bool __int128 +test_le (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmple (x, y); +} + +vector bool __int128 +testu_le (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmple (x, y); +} + +vector bool __int128 +test_lt (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmplt (x, y); +} + +vector bool __int128 +testu_lt (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmplt (x, y); +} + +vector bool __int128 +test_ne (vector signed __int128 x, vector signed __int128 y) +{ + return vec_cmpne (x, y); +} + +vector bool __int128 +testu_ne (vector unsigned __int128 x, vector unsigned __int128 y) +{ + return vec_cmpne (x, y); +} + +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */ +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */ +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */ +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c b/gcc/testsuite/gcc.target/powerpc/pr103316.c new file mode 100644 index 0000000000000..9f1ba791cc555 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c @@ -0,0 +1,81 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-require-effective-target int128 } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +vector bool __int128 +test_eq (vector signed __int128 a, vector signed __int128 b) +{ + return a == b; +} + +vector bool __int128 +test_ne (vector signed __int128 a, vector signed __int128 b) +{ + return a != b; +} + +vector bool __int128 +test_gt (vector signed __int128 a, vector signed __int128 b) +{ + return a > b; +} + +vector bool __int128 +test_ge (vector signed __int128 a, vector signed __int128 b) +{ + return a >= b; +} + +vector bool __int128 +test_lt (vector signed __int128 a, vector signed __int128 b) +{ + return a < b; +} + +vector bool __int128 +test_le (vector signed __int128 a, vector signed __int128 b) +{ + return a <= b; +} + +vector bool __int128 +testu_eq (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a == b; +} + +vector bool __int128 +testu_ne (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a != b; +} + +vector bool __int128 +testu_gt (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a > b; +} + +vector bool __int128 +testu_ge (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a >= b; +} + +vector bool __int128 +testu_lt (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a < b; +} + +vector bool __int128 +testu_le (vector unsigned __int128 a, vector unsigned __int128 b) +{ + return a <= b; +} + +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */ +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */ +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */ +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */