Discussion:
[PATCH, i386, Pointer Bounds Checker 31/x] Pointer Bounds Checker builtins for i386 target
Ilya Enkovich
2014-06-11 13:58:21 UTC
Permalink
Hi,

This patch adds i386 target builtins for Pointer Bounds Checker.

Bootstrapped and tested on linux-x86_64.

Thanks,
Ilya
--
gcc/

2014-06-11 Ilya Enkovich <***@intel.com>

* config/i386/i386-builtin-types.def (BND): New.
(ULONG): New.
(BND_FTYPE_PCVOID_ULONG): New.
(VOID_FTYPE_BND_PCVOID): New.
(VOID_FTYPE_PCVOID_PCVOID_BND): New.
(BND_FTYPE_PCVOID_PCVOID): New.
(BND_FTYPE_PCVOID): New.
(BND_FTYPE_BND_BND): New.
(PVOID_FTYPE_PVOID_PVOID_ULONG): New.
(PVOID_FTYPE_PCVOID_BND_ULONG): New.
(ULONG_FTYPE_VOID): New.
(PVOID_FTYPE_BND): New.
* config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
(ix86_builtins): Add
IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
IX86_BUILTIN_BNDUPPER.
(builtin_isa): Add leaf_p and nothrow_p fields.
(def_builtin): Initialize leaf_p and nothrow_p.
(ix86_add_new_builtins): Handle leaf_p and nothrow_p
flags.
(bdesc_mpx): New.
(bdesc_mpx_const): New.
(ix86_init_mpx_builtins): New.
(ix86_init_builtins): Call ix86_init_mpx_builtins.
(ix86_emit_move_max): New.
(ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.


diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 822c5e5..169d40a 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -47,6 +47,7 @@ DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
DEF_PRIMITIVE_TYPE (QI, char_type_node)
DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
+DEF_PRIMITIVE_TYPE (BND, pointer_bounds_type_node)
# ??? Logically this should be intDI_type_node, but that maps to "long"
# with 64-bit, and that's not how the emmintrin.h is written. Again,
# changing this would change name mangling.
@@ -60,6 +61,7 @@ DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
DEF_PRIMITIVE_TYPE (INT, integer_type_node)
DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
DEF_PRIMITIVE_TYPE (UNSIGNED, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (ULONG, long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (LONGLONG, long_long_integer_type_node)
DEF_PRIMITIVE_TYPE (ULONGLONG, long_long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
@@ -806,3 +808,15 @@ DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, TF)
DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, TF)
+
+# MPX builtins
+DEF_FUNCTION_TYPE (BND, PCVOID, ULONG)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, BND, BND)
+DEF_FUNCTION_TYPE (PVOID, PVOID, PVOID, ULONG)
+DEF_FUNCTION_TYPE (PVOID, PCVOID, BND, ULONG)
+DEF_FUNCTION_TYPE (ULONG, VOID)
+DEF_FUNCTION_TYPE (PVOID, BND)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 888f5ad..ac79231 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -81,6 +81,8 @@ along with GCC; see the file COPYING3. If not see
#include "context.h"
#include "pass_manager.h"
#include "target-globals.h"
+#include "tree-chkp.h"
+#include "rtl-chkp.h"

static rtx legitimize_dllimport_symbol (rtx, bool);
static rtx legitimize_pe_coff_extern_decl (rtx, bool);
@@ -28723,6 +28725,19 @@ enum ix86_builtins
IX86_BUILTIN_XABORT,
IX86_BUILTIN_XTEST,

+ /* MPX */
+ IX86_BUILTIN_BNDMK,
+ IX86_BUILTIN_BNDSTX,
+ IX86_BUILTIN_BNDLDX,
+ IX86_BUILTIN_BNDCL,
+ IX86_BUILTIN_BNDCU,
+ IX86_BUILTIN_BNDRET,
+ IX86_BUILTIN_BNDNARROW,
+ IX86_BUILTIN_BNDINT,
+ IX86_BUILTIN_SIZEOF,
+ IX86_BUILTIN_BNDLOWER,
+ IX86_BUILTIN_BNDUPPER,
+
/* BMI instructions. */
IX86_BUILTIN_BEXTR32,
IX86_BUILTIN_BEXTR64,
@@ -28796,6 +28811,8 @@ struct builtin_isa {
enum ix86_builtin_func_type tcode; /* type to use in the declaration */
HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
bool const_p; /* true if the declaration is constant */
+ bool leaf_p; /* true if the declaration has leaf attribute */
+ bool nothrow_p; /* true if the declaration has nothrow attribute */
bool set_and_not_built_p;
};

@@ -28847,6 +28864,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
ix86_builtins[(int) code] = NULL_TREE;
ix86_builtins_isa[(int) code].tcode = tcode;
ix86_builtins_isa[(int) code].name = name;
+ ix86_builtins_isa[(int) code].leaf_p = false;
+ ix86_builtins_isa[(int) code].nothrow_p = false;
ix86_builtins_isa[(int) code].const_p = false;
ix86_builtins_isa[(int) code].set_and_not_built_p = true;
}
@@ -28897,6 +28916,11 @@ ix86_add_new_builtins (HOST_WIDE_INT isa)
ix86_builtins[i] = decl;
if (ix86_builtins_isa[i].const_p)
TREE_READONLY (decl) = 1;
+ if (ix86_builtins_isa[i].leaf_p)
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ if (ix86_builtins_isa[i].nothrow_p)
+ TREE_NOTHROW (decl) = 1;
}
}
}
@@ -30344,6 +30368,27 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
};

+/* Bultins for MPX. */
+static const struct builtin_description bdesc_mpx[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+};
+
+/* Const builtins for MPX. */
+static const struct builtin_description bdesc_mpx_const[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
+};
+
/* FMA4 and XOP. */
#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
@@ -31189,6 +31234,67 @@ ix86_init_mmx_sse_builtins (void)
}
}

+static void
+ix86_init_mpx_builtins ()
+{
+ const struct builtin_description * d;
+ enum ix86_builtin_func_type ftype;
+ tree decl;
+ size_t i;
+
+ for (i = 0, d = bdesc_mpx;
+ i < ARRAY_SIZE (bdesc_mpx);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin (d->mask, d->name, ftype, d->code);
+
+ /* With no leaf and nothrow flags for MPX builtins
+ abnormal edges may follow its call when setjmp
+ presents in the function. Since we may have a lot
+ of MPX builtins calls it causes lots of useless
+ edges and enormous PHI nodes. To avoid this we mark
+ MPX builtins as leaf and nothrow. */
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+
+ for (i = 0, d = bdesc_mpx_const;
+ i < ARRAY_SIZE (bdesc_mpx_const);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin_const (d->mask, d->name, ftype, d->code);
+
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+}
+
/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
to return a pointer to VERSION_DECL if the outcome of the expression
formed by PREDICATE_CHAIN is true. This function will be called during
@@ -32727,6 +32833,7 @@ ix86_init_builtins (void)

ix86_init_tm_builtins ();
ix86_init_mmx_sse_builtins ();
+ ix86_init_mpx_builtins ();

if (TARGET_LP64)
ix86_init_builtins_va_builtins_abi ();
@@ -34991,6 +35098,29 @@ ix86_expand_vec_set_builtin (tree exp)
return target;
}

+/* Choose max of DST and SRC and put it to DST. */
+static void
+ix86_emit_move_max (rtx dst, rtx src)
+{
+ rtx t;
+
+ if (TARGET_CMOVE)
+ {
+ t = ix86_expand_compare (LTU, dst, src);
+ emit_insn (gen_rtx_SET (VOIDmode, dst,
+ gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
+ src, dst)));
+ }
+ else
+ {
+ rtx nomove = gen_label_rtx ();
+ emit_cmp_and_jump_insns (dst, src, GEU, const0_rtx,
+ GET_MODE (dst), 1, nomove);
+ emit_insn (gen_rtx_SET (VOIDmode, dst, src));
+ emit_label (nomove);
+ }
+}
+
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
@@ -35056,6 +35186,319 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,

switch (fcode)
{
+ case IX86_BUILTIN_BNDMK:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ /* Builtin arg1 is size of block but instruction op1 should
+ be (size - 1). */
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (fold_build2 (PLUS_EXPR, TREE_TYPE (arg1),
+ arg1, integer_minus_one_node));
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (Pmode, op1);
+
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_mk (target, op0, op1)
+ : gen_bnd32_mk (target, op0, op1));
+ return target;
+
+ case IX86_BUILTIN_BNDSTX:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (BNDmode, op1);
+ op2 = force_reg (Pmode, op2);
+
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_stx (op2, op0, op1)
+ : gen_bnd32_stx (op2, op0, op1));
+ return 0;
+
+ case IX86_BUILTIN_BNDLDX:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (Pmode, op1);
+
+ /* Avoid registers which connot be used as index. */
+ if (!index_register_operand (op1, Pmode))
+ {
+ rtx temp = gen_reg_rtx (Pmode);
+ emit_move_insn (temp, op1);
+ op1 = temp;
+ }
+
+ /* If op1 was a register originally then it may have
+ mode other than Pmode. We need to extend in such
+ case because bndldx may work only with Pmode regs. */
+ if (GET_MODE (op1) != Pmode)
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ if (REG_P (target))
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_ldx (target, op0, op1)
+ : gen_bnd32_ldx (target, op0, op1));
+ else
+ {
+ rtx temp = gen_reg_rtx (BNDmode);
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_ldx (temp, op0, op1)
+ : gen_bnd32_ldx (temp, op0, op1));
+ emit_move_insn (target, temp);
+ }
+ return target;
+
+ case IX86_BUILTIN_BNDCL:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (BNDmode, op1);
+
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_cl (op1, op0)
+ : gen_bnd32_cl (op1, op0));
+ return 0;
+
+ case IX86_BUILTIN_BNDCU:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (BNDmode, op1);
+
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_cu (op1, op0)
+ : gen_bnd32_cu (op1, op0));
+ return 0;
+
+ case IX86_BUILTIN_BNDRET:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == SSA_NAME);
+ target = chkp_get_rtl_bounds (arg0);
+ /* If no bounds were specified for returned value,
+ then use INIT bounds. It usually happens when
+ some built-in function is expanded. */
+ if (!target)
+ {
+ rtx t1 = gen_reg_rtx (Pmode);
+ rtx t2 = gen_reg_rtx (Pmode);
+ target = gen_reg_rtx (BNDmode);
+ emit_move_insn (t1, const0_rtx);
+ emit_move_insn (t2, constm1_rtx);
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_mk (target, t1, t2)
+ : gen_bnd32_mk (target, t1, t2));
+ }
+ gcc_assert (target && REG_P (target));
+ return target;
+
+ case IX86_BUILTIN_BNDNARROW:
+ {
+ rtx m1, m1h1, m1h2, lb, ub, t1;
+
+ /* Return value and lb. */
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ /* Bounds. */
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ /* Size. */
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ /* Size was passed but we need to use (size - 1) as for bndmk. */
+ arg2 = fold_build2 (PLUS_EXPR, TREE_TYPE (arg2), arg2,
+ integer_minus_one_node);
+
+ /* Add LB to size and inverse to get UB. */
+ arg2 = fold_build2 (PLUS_EXPR, TREE_TYPE (arg2), arg2, arg0);
+ arg2 = fold_build1 (BIT_NOT_EXPR, TREE_TYPE (arg2), arg2);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ lb = force_reg (Pmode, op0);
+ ub = force_reg (Pmode, op2);
+
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op1))
+ {
+ m1 = assign_stack_local (BNDmode, GET_MODE_SIZE (BNDmode), 0);
+ emit_insn (gen_move_insn (m1, op1));
+ }
+ else
+ m1 = op1;
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0),
+ GET_MODE_SIZE (Pmode)));
+
+ t1 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ ix86_emit_move_max (t1, lb);
+ emit_move_insn (m1h1, t1);
+
+
+ /* Compute UB. UB are stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ ix86_emit_move_max (t1, ub);
+ emit_move_insn (m1h2, t1);
+
+ op2 = gen_reg_rtx (BNDmode);
+ emit_move_insn (op2, m1);
+
+ return chkp_join_splitted_slot (op0, op2);
+ }
+
+ case IX86_BUILTIN_BNDINT:
+ {
+ unsigned bndsize = GET_MODE_SIZE (BNDmode);
+ unsigned psize = GET_MODE_SIZE (Pmode);
+ rtx res = assign_stack_local (BNDmode, bndsize, 0);
+ rtx m1, m2, m1h1, m1h2, m2h1, m2h2, t1, t2, rh1, rh2;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op0))
+ {
+ m1 = assign_stack_local (BNDmode, bndsize, 0);
+ emit_insn (gen_move_insn (m1, op0));
+ }
+ else
+ m1 = op0;
+
+ if (!MEM_P (op1))
+ {
+ m2 = assign_stack_local (BNDmode, bndsize, 0);
+ emit_move_insn (m2, op1);
+ }
+ else
+ m2 = op1;
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0), psize));
+ m2h1 = gen_rtx_MEM (Pmode, XEXP (m2, 0));
+ m2h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m2, 0), psize));
+ rh1 = gen_rtx_MEM (Pmode, XEXP (res, 0));
+ rh2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (res, 0), psize));
+
+ /* Allocate temporaries. */
+ t1 = gen_reg_rtx (Pmode);
+ t2 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ emit_move_insn (t2, m2h1);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh1, t1);
+
+ /* Compute UB. UB are stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ emit_move_insn (t2, m2h2);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh2, t1);
+
+ return res;
+ }
+
+ case IX86_BUILTIN_SIZEOF:
+ {
+ enum machine_mode mode = Pmode;
+ rtx t1, t2;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == VAR_DECL);
+
+ t1 = gen_reg_rtx (mode);
+ t2 = gen_rtx_SYMBOL_REF (Pmode,
+ IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (arg0)));
+ t2 = gen_rtx_UNSPEC (mode, gen_rtvec (1, t2), UNSPEC_SIZEOF);
+
+ emit_insn (gen_rtx_SET (VOIDmode, t1, t2));
+
+ return t1;
+ }
+
+ case IX86_BUILTIN_BNDLOWER:
+ {
+ rtx mem, hmem;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_stack_local (BNDmode, GET_MODE_SIZE (BNDmode), 0);
+ emit_insn (gen_move_insn (mem, op0));
+ }
+ else
+ mem = op0;
+
+ /* Generate mem expression to access LB and load it. */
+ hmem = gen_rtx_MEM (Pmode, XEXP (mem, 0));
+ target = gen_reg_rtx (Pmode);
+ emit_move_insn (target, hmem);
+
+ return target;
+ }
+
+ case IX86_BUILTIN_BNDUPPER:
+ {
+ rtx mem, hmem;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_stack_local (BNDmode, GET_MODE_SIZE (BNDmode), 0);
+ emit_insn (gen_move_insn (mem, op0));
+ }
+ else
+ mem = op0;
+
+ /* Generate mem expression to access UB and load it. */
+ hmem = gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode, XEXP (mem, 0),
+ GEN_INT (GET_MODE_SIZE (Pmode))));
+ target = gen_reg_rtx (Pmode);
+ emit_move_insn (target, hmem);
+
+ /* We need to inverse all bits of UB. */
+ emit_insn (gen_rtx_SET (Pmode, target, gen_rtx_NOT (Pmode, target)));
+
+ return target;
+ }
+
case IX86_BUILTIN_MASKMOVQ:
case IX86_BUILTIN_MASKMOVDQU:
icode = (fcode == IX86_BUILTIN_MASKMOVQ
Ilya Enkovich
2014-09-15 07:17:32 UTC
Permalink
Ping
Post by Ilya Enkovich
Hi,
This patch adds i386 target builtins for Pointer Bounds Checker.
Bootstrapped and tested on linux-x86_64.
Thanks,
Ilya
--
gcc/
* config/i386/i386-builtin-types.def (BND): New.
(ULONG): New.
(BND_FTYPE_PCVOID_ULONG): New.
(VOID_FTYPE_BND_PCVOID): New.
(VOID_FTYPE_PCVOID_PCVOID_BND): New.
(BND_FTYPE_PCVOID_PCVOID): New.
(BND_FTYPE_PCVOID): New.
(BND_FTYPE_BND_BND): New.
(PVOID_FTYPE_PVOID_PVOID_ULONG): New.
(PVOID_FTYPE_PCVOID_BND_ULONG): New.
(ULONG_FTYPE_VOID): New.
(PVOID_FTYPE_BND): New.
* config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
(ix86_builtins): Add
IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
IX86_BUILTIN_BNDUPPER.
(builtin_isa): Add leaf_p and nothrow_p fields.
(def_builtin): Initialize leaf_p and nothrow_p.
(ix86_add_new_builtins): Handle leaf_p and nothrow_p
flags.
(bdesc_mpx): New.
(bdesc_mpx_const): New.
(ix86_init_mpx_builtins): New.
(ix86_init_builtins): Call ix86_init_mpx_builtins.
(ix86_emit_move_max): New.
(ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 822c5e5..169d40a 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -47,6 +47,7 @@ DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
DEF_PRIMITIVE_TYPE (QI, char_type_node)
DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
+DEF_PRIMITIVE_TYPE (BND, pointer_bounds_type_node)
# ??? Logically this should be intDI_type_node, but that maps to "long"
# with 64-bit, and that's not how the emmintrin.h is written. Again,
# changing this would change name mangling.
@@ -60,6 +61,7 @@ DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
DEF_PRIMITIVE_TYPE (INT, integer_type_node)
DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
DEF_PRIMITIVE_TYPE (UNSIGNED, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (ULONG, long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (LONGLONG, long_long_integer_type_node)
DEF_PRIMITIVE_TYPE (ULONGLONG, long_long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
@@ -806,3 +808,15 @@ DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, TF)
DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, TF)
+
+# MPX builtins
+DEF_FUNCTION_TYPE (BND, PCVOID, ULONG)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, BND, BND)
+DEF_FUNCTION_TYPE (PVOID, PVOID, PVOID, ULONG)
+DEF_FUNCTION_TYPE (PVOID, PCVOID, BND, ULONG)
+DEF_FUNCTION_TYPE (ULONG, VOID)
+DEF_FUNCTION_TYPE (PVOID, BND)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 888f5ad..ac79231 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -81,6 +81,8 @@ along with GCC; see the file COPYING3. If not see
#include "context.h"
#include "pass_manager.h"
#include "target-globals.h"
+#include "tree-chkp.h"
+#include "rtl-chkp.h"
static rtx legitimize_dllimport_symbol (rtx, bool);
static rtx legitimize_pe_coff_extern_decl (rtx, bool);
@@ -28723,6 +28725,19 @@ enum ix86_builtins
IX86_BUILTIN_XABORT,
IX86_BUILTIN_XTEST,
+ /* MPX */
+ IX86_BUILTIN_BNDMK,
+ IX86_BUILTIN_BNDSTX,
+ IX86_BUILTIN_BNDLDX,
+ IX86_BUILTIN_BNDCL,
+ IX86_BUILTIN_BNDCU,
+ IX86_BUILTIN_BNDRET,
+ IX86_BUILTIN_BNDNARROW,
+ IX86_BUILTIN_BNDINT,
+ IX86_BUILTIN_SIZEOF,
+ IX86_BUILTIN_BNDLOWER,
+ IX86_BUILTIN_BNDUPPER,
+
/* BMI instructions. */
IX86_BUILTIN_BEXTR32,
IX86_BUILTIN_BEXTR64,
@@ -28796,6 +28811,8 @@ struct builtin_isa {
enum ix86_builtin_func_type tcode; /* type to use in the declaration */
HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
bool const_p; /* true if the declaration is constant */
+ bool leaf_p; /* true if the declaration has leaf attribute */
+ bool nothrow_p; /* true if the declaration has nothrow attribute */
bool set_and_not_built_p;
};
@@ -28847,6 +28864,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
ix86_builtins[(int) code] = NULL_TREE;
ix86_builtins_isa[(int) code].tcode = tcode;
ix86_builtins_isa[(int) code].name = name;
+ ix86_builtins_isa[(int) code].leaf_p = false;
+ ix86_builtins_isa[(int) code].nothrow_p = false;
ix86_builtins_isa[(int) code].const_p = false;
ix86_builtins_isa[(int) code].set_and_not_built_p = true;
}
@@ -28897,6 +28916,11 @@ ix86_add_new_builtins (HOST_WIDE_INT isa)
ix86_builtins[i] = decl;
if (ix86_builtins_isa[i].const_p)
TREE_READONLY (decl) = 1;
+ if (ix86_builtins_isa[i].leaf_p)
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ if (ix86_builtins_isa[i].nothrow_p)
+ TREE_NOTHROW (decl) = 1;
}
}
}
@@ -30344,6 +30368,27 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
};
+/* Bultins for MPX. */
+static const struct builtin_description bdesc_mpx[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+};
+
+/* Const builtins for MPX. */
+static const struct builtin_description bdesc_mpx_const[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
+};
+
/* FMA4 and XOP. */
#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
@@ -31189,6 +31234,67 @@ ix86_init_mmx_sse_builtins (void)
}
}
+static void
+ix86_init_mpx_builtins ()
+{
+ const struct builtin_description * d;
+ enum ix86_builtin_func_type ftype;
+ tree decl;
+ size_t i;
+
+ for (i = 0, d = bdesc_mpx;
+ i < ARRAY_SIZE (bdesc_mpx);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin (d->mask, d->name, ftype, d->code);
+
+ /* With no leaf and nothrow flags for MPX builtins
+ abnormal edges may follow its call when setjmp
+ presents in the function. Since we may have a lot
+ of MPX builtins calls it causes lots of useless
+ edges and enormous PHI nodes. To avoid this we mark
+ MPX builtins as leaf and nothrow. */
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+
+ for (i = 0, d = bdesc_mpx_const;
+ i < ARRAY_SIZE (bdesc_mpx_const);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin_const (d->mask, d->name, ftype, d->code);
+
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+}
+
/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
to return a pointer to VERSION_DECL if the outcome of the expression
formed by PREDICATE_CHAIN is true. This function will be called during
@@ -32727,6 +32833,7 @@ ix86_init_builtins (void)
ix86_init_tm_builtins ();
ix86_init_mmx_sse_builtins ();
+ ix86_init_mpx_builtins ();
if (TARGET_LP64)
ix86_init_builtins_va_builtins_abi ();
@@ -34991,6 +35098,29 @@ ix86_expand_vec_set_builtin (tree exp)
return target;
}
+/* Choose max of DST and SRC and put it to DST. */
+static void
+ix86_emit_move_max (rtx dst, rtx src)
+{
+ rtx t;
+
+ if (TARGET_CMOVE)
+ {
+ t = ix86_expand_compare (LTU, dst, src);
+ emit_insn (gen_rtx_SET (VOIDmode, dst,
+ gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
+ src, dst)));
+ }
+ else
+ {
+ rtx nomove = gen_label_rtx ();
+ emit_cmp_and_jump_insns (dst, src, GEU, const0_rtx,
+ GET_MODE (dst), 1, nomove);
+ emit_insn (gen_rtx_SET (VOIDmode, dst, src));
+ emit_label (nomove);
+ }
+}
+
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
@@ -35056,6 +35186,319 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
switch (fcode)
{
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ /* Builtin arg1 is size of block but instruction op1 should
+ be (size - 1). */
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (fold_build2 (PLUS_EXPR, TREE_TYPE (arg1),
+ arg1, integer_minus_one_node));
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (Pmode, op1);
+
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_mk (target, op0, op1)
+ : gen_bnd32_mk (target, op0, op1));
+ return target;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (BNDmode, op1);
+ op2 = force_reg (Pmode, op2);
+
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_stx (op2, op0, op1)
+ : gen_bnd32_stx (op2, op0, op1));
+ return 0;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (Pmode, op1);
+
+ /* Avoid registers which connot be used as index. */
+ if (!index_register_operand (op1, Pmode))
+ {
+ rtx temp = gen_reg_rtx (Pmode);
+ emit_move_insn (temp, op1);
+ op1 = temp;
+ }
+
+ /* If op1 was a register originally then it may have
+ mode other than Pmode. We need to extend in such
+ case because bndldx may work only with Pmode regs. */
+ if (GET_MODE (op1) != Pmode)
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ if (REG_P (target))
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_ldx (target, op0, op1)
+ : gen_bnd32_ldx (target, op0, op1));
+ else
+ {
+ rtx temp = gen_reg_rtx (BNDmode);
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_ldx (temp, op0, op1)
+ : gen_bnd32_ldx (temp, op0, op1));
+ emit_move_insn (target, temp);
+ }
+ return target;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (BNDmode, op1);
+
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_cl (op1, op0)
+ : gen_bnd32_cl (op1, op0));
+ return 0;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (BNDmode, op1);
+
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_cu (op1, op0)
+ : gen_bnd32_cu (op1, op0));
+ return 0;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == SSA_NAME);
+ target = chkp_get_rtl_bounds (arg0);
+ /* If no bounds were specified for returned value,
+ then use INIT bounds. It usually happens when
+ some built-in function is expanded. */
+ if (!target)
+ {
+ rtx t1 = gen_reg_rtx (Pmode);
+ rtx t2 = gen_reg_rtx (Pmode);
+ target = gen_reg_rtx (BNDmode);
+ emit_move_insn (t1, const0_rtx);
+ emit_move_insn (t2, constm1_rtx);
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_mk (target, t1, t2)
+ : gen_bnd32_mk (target, t1, t2));
+ }
+ gcc_assert (target && REG_P (target));
+ return target;
+
+ {
+ rtx m1, m1h1, m1h2, lb, ub, t1;
+
+ /* Return value and lb. */
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ /* Bounds. */
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ /* Size. */
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ /* Size was passed but we need to use (size - 1) as for bndmk. */
+ arg2 = fold_build2 (PLUS_EXPR, TREE_TYPE (arg2), arg2,
+ integer_minus_one_node);
+
+ /* Add LB to size and inverse to get UB. */
+ arg2 = fold_build2 (PLUS_EXPR, TREE_TYPE (arg2), arg2, arg0);
+ arg2 = fold_build1 (BIT_NOT_EXPR, TREE_TYPE (arg2), arg2);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ lb = force_reg (Pmode, op0);
+ ub = force_reg (Pmode, op2);
+
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op1))
+ {
+ m1 = assign_stack_local (BNDmode, GET_MODE_SIZE (BNDmode), 0);
+ emit_insn (gen_move_insn (m1, op1));
+ }
+ else
+ m1 = op1;
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0),
+ GET_MODE_SIZE (Pmode)));
+
+ t1 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ ix86_emit_move_max (t1, lb);
+ emit_move_insn (m1h1, t1);
+
+
+ /* Compute UB. UB are stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ ix86_emit_move_max (t1, ub);
+ emit_move_insn (m1h2, t1);
+
+ op2 = gen_reg_rtx (BNDmode);
+ emit_move_insn (op2, m1);
+
+ return chkp_join_splitted_slot (op0, op2);
+ }
+
+ {
+ unsigned bndsize = GET_MODE_SIZE (BNDmode);
+ unsigned psize = GET_MODE_SIZE (Pmode);
+ rtx res = assign_stack_local (BNDmode, bndsize, 0);
+ rtx m1, m2, m1h1, m1h2, m2h1, m2h2, t1, t2, rh1, rh2;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op0))
+ {
+ m1 = assign_stack_local (BNDmode, bndsize, 0);
+ emit_insn (gen_move_insn (m1, op0));
+ }
+ else
+ m1 = op0;
+
+ if (!MEM_P (op1))
+ {
+ m2 = assign_stack_local (BNDmode, bndsize, 0);
+ emit_move_insn (m2, op1);
+ }
+ else
+ m2 = op1;
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0), psize));
+ m2h1 = gen_rtx_MEM (Pmode, XEXP (m2, 0));
+ m2h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m2, 0), psize));
+ rh1 = gen_rtx_MEM (Pmode, XEXP (res, 0));
+ rh2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (res, 0), psize));
+
+ /* Allocate temporaries. */
+ t1 = gen_reg_rtx (Pmode);
+ t2 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ emit_move_insn (t2, m2h1);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh1, t1);
+
+ /* Compute UB. UB are stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ emit_move_insn (t2, m2h2);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh2, t1);
+
+ return res;
+ }
+
+ {
+ enum machine_mode mode = Pmode;
+ rtx t1, t2;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == VAR_DECL);
+
+ t1 = gen_reg_rtx (mode);
+ t2 = gen_rtx_SYMBOL_REF (Pmode,
+ IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (arg0)));
+ t2 = gen_rtx_UNSPEC (mode, gen_rtvec (1, t2), UNSPEC_SIZEOF);
+
+ emit_insn (gen_rtx_SET (VOIDmode, t1, t2));
+
+ return t1;
+ }
+
+ {
+ rtx mem, hmem;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_stack_local (BNDmode, GET_MODE_SIZE (BNDmode), 0);
+ emit_insn (gen_move_insn (mem, op0));
+ }
+ else
+ mem = op0;
+
+ /* Generate mem expression to access LB and load it. */
+ hmem = gen_rtx_MEM (Pmode, XEXP (mem, 0));
+ target = gen_reg_rtx (Pmode);
+ emit_move_insn (target, hmem);
+
+ return target;
+ }
+
+ {
+ rtx mem, hmem;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_stack_local (BNDmode, GET_MODE_SIZE (BNDmode), 0);
+ emit_insn (gen_move_insn (mem, op0));
+ }
+ else
+ mem = op0;
+
+ /* Generate mem expression to access UB and load it. */
+ hmem = gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode, XEXP (mem, 0),
+ GEN_INT (GET_MODE_SIZE (Pmode))));
+ target = gen_reg_rtx (Pmode);
+ emit_move_insn (target, hmem);
+
+ /* We need to inverse all bits of UB. */
+ emit_insn (gen_rtx_SET (Pmode, target, gen_rtx_NOT (Pmode, target)));
+
+ return target;
+ }
+
icode = (fcode == IX86_BUILTIN_MASKMOVQ
Uros Bizjak
2014-09-16 10:02:45 UTC
Permalink
Post by Ilya Enkovich
* config/i386/i386-builtin-types.def (BND): New.
(ULONG): New.
(BND_FTYPE_PCVOID_ULONG): New.
(VOID_FTYPE_BND_PCVOID): New.
(VOID_FTYPE_PCVOID_PCVOID_BND): New.
(BND_FTYPE_PCVOID_PCVOID): New.
(BND_FTYPE_PCVOID): New.
(BND_FTYPE_BND_BND): New.
(PVOID_FTYPE_PVOID_PVOID_ULONG): New.
(PVOID_FTYPE_PCVOID_BND_ULONG): New.
(ULONG_FTYPE_VOID): New.
(PVOID_FTYPE_BND): New.
* config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
(ix86_builtins): Add
IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
IX86_BUILTIN_BNDUPPER.
(builtin_isa): Add leaf_p and nothrow_p fields.
(def_builtin): Initialize leaf_p and nothrow_p.
(ix86_add_new_builtins): Handle leaf_p and nothrow_p
flags.
(bdesc_mpx): New.
(bdesc_mpx_const): New.
(ix86_init_mpx_builtins): New.
(ix86_init_builtins): Call ix86_init_mpx_builtins.
(ix86_emit_move_max): New.
(ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.
Hm, can this patch be compiled as part of the series? The expanders
refer to various gen_bnd patterns that I don't see. Also, I don't see
BND mode introduced.
Post by Ilya Enkovich
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (Pmode, op1);
+
+ /* Avoid registers which connot be used as index. */
+ if (!index_register_operand (op1, Pmode))
+ {
+ rtx temp = gen_reg_rtx (Pmode);
+ emit_move_insn (temp, op1);
+ op1 = temp;
+ }
+
+ /* If op1 was a register originally then it may have
+ mode other than Pmode. We need to extend in such
+ case because bndldx may work only with Pmode regs. */
+ if (GET_MODE (op1) != Pmode)
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ if (REG_P (target))
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_ldx (target, op0, op1)
+ : gen_bnd32_ldx (target, op0, op1));
+ else
+ {
+ rtx temp = gen_reg_rtx (BNDmode);
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_ldx (temp, op0, op1)
+ : gen_bnd32_ldx (temp, op0, op1));
+ emit_move_insn (target, temp);
+ }
+ return target;
I don't like the way arguments are prepared. For the case above,
bnd_ldx should have index_register_operand predicate in its pattern,
and this predicate (and its mode) should be checked in the expander
code. There are many examples of argument expansion in
ix86_expand_builtin function, including how Pmode is handled.

Also, please see how target is handled there. Target can be null, so
REG_P predicate will crash.

You should also select insn patterns depending on BNDmode, not TARGET_64BIT.

Please use assign_386_stack_local so stack slots can be shared.
SLOT_TEMP is intended for short-lived temporaries, you can introduce
new slots if you need more live values at once.

Uros.
Ilya Enkovich
2014-09-17 08:11:23 UTC
Permalink
Post by Uros Bizjak
Hm, can this patch be compiled as part of the series? The expanders
refer to various gen_bnd patterns that I don't see. Also, I don't see
BND mode introduced.
Hi,

Here is a patch from the series that introduces modes and instructions: https://gcc.gnu.org/ml/gcc-patches/2014-04/msg00880.html. It needs update in bndldx expander as you suggested.
Post by Uros Bizjak
Post by Ilya Enkovich
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (Pmode, op1);
+
+ /* Avoid registers which connot be used as index. */
+ if (!index_register_operand (op1, Pmode))
+ {
+ rtx temp = gen_reg_rtx (Pmode);
+ emit_move_insn (temp, op1);
+ op1 = temp;
+ }
+
+ /* If op1 was a register originally then it may have
+ mode other than Pmode. We need to extend in such
+ case because bndldx may work only with Pmode regs. */
+ if (GET_MODE (op1) != Pmode)
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ if (REG_P (target))
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_ldx (target, op0, op1)
+ : gen_bnd32_ldx (target, op0, op1));
+ else
+ {
+ rtx temp = gen_reg_rtx (BNDmode);
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_ldx (temp, op0, op1)
+ : gen_bnd32_ldx (temp, op0, op1));
+ emit_move_insn (target, temp);
+ }
+ return target;
I don't like the way arguments are prepared. For the case above,
bnd_ldx should have index_register_operand predicate in its pattern,
and this predicate (and its mode) should be checked in the expander
code. There are many examples of argument expansion in
ix86_expand_builtin function, including how Pmode is handled.
Also, please see how target is handled there. Target can be null, so
REG_P predicate will crash.
You should also select insn patterns depending on BNDmode, not TARGET_64BIT.
Please use assign_386_stack_local so stack slots can be shared.
SLOT_TEMP is intended for short-lived temporaries, you can introduce
new slots if you need more live values at once.
Uros.
Thanks for comments! Here is a new version in which I addressed all your concerns.

Ilya
--
2014-09-17 Ilya Enkovich <***@intel.com>

* config/i386/i386-builtin-types.def (BND): New.
(ULONG): New.
(BND_FTYPE_PCVOID_ULONG): New.
(VOID_FTYPE_BND_PCVOID): New.
(VOID_FTYPE_PCVOID_PCVOID_BND): New.
(BND_FTYPE_PCVOID_PCVOID): New.
(BND_FTYPE_PCVOID): New.
(BND_FTYPE_BND_BND): New.
(PVOID_FTYPE_PVOID_PVOID_ULONG): New.
(PVOID_FTYPE_PCVOID_BND_ULONG): New.
(ULONG_FTYPE_VOID): New.
(PVOID_FTYPE_BND): New.
* config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
(ix86_builtins): Add
IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
IX86_BUILTIN_BNDUPPER.
(builtin_isa): Add leaf_p and nothrow_p fields.
(def_builtin): Initialize leaf_p and nothrow_p.
(ix86_add_new_builtins): Handle leaf_p and nothrow_p
flags.
(bdesc_mpx): New.
(bdesc_mpx_const): New.
(ix86_init_mpx_builtins): New.
(ix86_init_builtins): Call ix86_init_mpx_builtins.
(ix86_emit_move_max): New.
(ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.
* config/i386/i386.h (ix86_stack_slot): Added SLOT_BND_STORED.

diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 35c0035..989297a 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -47,6 +47,7 @@ DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
DEF_PRIMITIVE_TYPE (QI, char_type_node)
DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
+DEF_PRIMITIVE_TYPE (BND, pointer_bounds_type_node)
# ??? Logically this should be intDI_type_node, but that maps to "long"
# with 64-bit, and that's not how the emmintrin.h is written. Again,
# changing this would change name mangling.
@@ -60,6 +61,7 @@ DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
DEF_PRIMITIVE_TYPE (INT, integer_type_node)
DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
DEF_PRIMITIVE_TYPE (UNSIGNED, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (ULONG, long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (LONGLONG, long_long_integer_type_node)
DEF_PRIMITIVE_TYPE (ULONGLONG, long_long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
@@ -806,3 +808,15 @@ DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, TF)
DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, TF)
+
+# MPX builtins
+DEF_FUNCTION_TYPE (BND, PCVOID, ULONG)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, BND, BND)
+DEF_FUNCTION_TYPE (PVOID, PVOID, PVOID, ULONG)
+DEF_FUNCTION_TYPE (PVOID, PCVOID, BND, ULONG)
+DEF_FUNCTION_TYPE (ULONG, VOID)
+DEF_FUNCTION_TYPE (PVOID, BND)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d0f58b1..a076450 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -85,6 +85,8 @@ along with GCC; see the file COPYING3. If not see
#include "tree-vectorizer.h"
#include "shrink-wrap.h"
#include "builtins.h"
+#include "tree-chkp.h"
+#include "rtl-chkp.h"

static rtx legitimize_dllimport_symbol (rtx, bool);
static rtx legitimize_pe_coff_extern_decl (rtx, bool);
@@ -28775,6 +28777,19 @@ enum ix86_builtins
IX86_BUILTIN_XABORT,
IX86_BUILTIN_XTEST,

+ /* MPX */
+ IX86_BUILTIN_BNDMK,
+ IX86_BUILTIN_BNDSTX,
+ IX86_BUILTIN_BNDLDX,
+ IX86_BUILTIN_BNDCL,
+ IX86_BUILTIN_BNDCU,
+ IX86_BUILTIN_BNDRET,
+ IX86_BUILTIN_BNDNARROW,
+ IX86_BUILTIN_BNDINT,
+ IX86_BUILTIN_SIZEOF,
+ IX86_BUILTIN_BNDLOWER,
+ IX86_BUILTIN_BNDUPPER,
+
/* BMI instructions. */
IX86_BUILTIN_BEXTR32,
IX86_BUILTIN_BEXTR64,
@@ -28848,6 +28863,8 @@ struct builtin_isa {
enum ix86_builtin_func_type tcode; /* type to use in the declaration */
HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
bool const_p; /* true if the declaration is constant */
+ bool leaf_p; /* true if the declaration has leaf attribute */
+ bool nothrow_p; /* true if the declaration has nothrow attribute */
bool set_and_not_built_p;
};

@@ -28899,6 +28916,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
ix86_builtins[(int) code] = NULL_TREE;
ix86_builtins_isa[(int) code].tcode = tcode;
ix86_builtins_isa[(int) code].name = name;
+ ix86_builtins_isa[(int) code].leaf_p = false;
+ ix86_builtins_isa[(int) code].nothrow_p = false;
ix86_builtins_isa[(int) code].const_p = false;
ix86_builtins_isa[(int) code].set_and_not_built_p = true;
}
@@ -28949,6 +28968,11 @@ ix86_add_new_builtins (HOST_WIDE_INT isa)
ix86_builtins[i] = decl;
if (ix86_builtins_isa[i].const_p)
TREE_READONLY (decl) = 1;
+ if (ix86_builtins_isa[i].leaf_p)
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ if (ix86_builtins_isa[i].nothrow_p)
+ TREE_NOTHROW (decl) = 1;
}
}
}
@@ -30402,6 +30426,27 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
};

+/* Bultins for MPX. */
+static const struct builtin_description bdesc_mpx[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+};
+
+/* Const builtins for MPX. */
+static const struct builtin_description bdesc_mpx_const[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
+};
+
/* FMA4 and XOP. */
#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
@@ -31250,6 +31295,67 @@ ix86_init_mmx_sse_builtins (void)
}
}

+static void
+ix86_init_mpx_builtins ()
+{
+ const struct builtin_description * d;
+ enum ix86_builtin_func_type ftype;
+ tree decl;
+ size_t i;
+
+ for (i = 0, d = bdesc_mpx;
+ i < ARRAY_SIZE (bdesc_mpx);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin (d->mask, d->name, ftype, d->code);
+
+ /* With no leaf and nothrow flags for MPX builtins
+ abnormal edges may follow its call when setjmp
+ presents in the function. Since we may have a lot
+ of MPX builtins calls it causes lots of useless
+ edges and enormous PHI nodes. To avoid this we mark
+ MPX builtins as leaf and nothrow. */
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+
+ for (i = 0, d = bdesc_mpx_const;
+ i < ARRAY_SIZE (bdesc_mpx_const);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin_const (d->mask, d->name, ftype, d->code);
+
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+}
+
/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
to return a pointer to VERSION_DECL if the outcome of the expression
formed by PREDICATE_CHAIN is true. This function will be called during
@@ -32788,6 +32894,7 @@ ix86_init_builtins (void)

ix86_init_tm_builtins ();
ix86_init_mmx_sse_builtins ();
+ ix86_init_mpx_builtins ();

if (TARGET_LP64)
ix86_init_builtins_va_builtins_abi ();
@@ -35053,6 +35160,29 @@ ix86_expand_vec_set_builtin (tree exp)
return target;
}

+/* Choose max of DST and SRC and put it to DST. */
+static void
+ix86_emit_move_max (rtx dst, rtx src)
+{
+ rtx t;
+
+ if (TARGET_CMOVE)
+ {
+ t = ix86_expand_compare (LTU, dst, src);
+ emit_insn (gen_rtx_SET (VOIDmode, dst,
+ gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
+ src, dst)));
+ }
+ else
+ {
+ rtx nomove = gen_label_rtx ();
+ emit_cmp_and_jump_insns (dst, src, GEU, const0_rtx,
+ GET_MODE (dst), 1, nomove);
+ emit_insn (gen_rtx_SET (VOIDmode, dst, src));
+ emit_label (nomove);
+ }
+}
+
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
@@ -35118,6 +35248,311 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,

switch (fcode)
{
+ case IX86_BUILTIN_BNDMK:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ /* Builtin arg1 is size of block but instruction op1 should
+ be (size - 1). */
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (fold_build2 (PLUS_EXPR, TREE_TYPE (arg1),
+ arg1, integer_minus_one_node));
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (Pmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, op0, op1)
+ : gen_bnd32_mk (target, op0, op1));
+ return target;
+
+ case IX86_BUILTIN_BNDSTX:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (BNDmode, op1);
+ op2 = force_reg (Pmode, op2);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_stx (op2, op0, op1)
+ : gen_bnd32_stx (op2, op0, op1));
+ return 0;
+
+ case IX86_BUILTIN_BNDLDX:
+ if (!target)
+ return 0;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (Pmode, op1);
+
+ if (REG_P (target))
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_ldx (target, op0, op1)
+ : gen_bnd32_ldx (target, op0, op1));
+ else
+ {
+ rtx temp = gen_reg_rtx (BNDmode);
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_ldx (temp, op0, op1)
+ : gen_bnd32_ldx (temp, op0, op1));
+ emit_move_insn (target, temp);
+ }
+ return target;
+
+ case IX86_BUILTIN_BNDCL:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cl (op1, op0)
+ : gen_bnd32_cl (op1, op0));
+ return 0;
+
+ case IX86_BUILTIN_BNDCU:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cu (op1, op0)
+ : gen_bnd32_cu (op1, op0));
+ return 0;
+
+ case IX86_BUILTIN_BNDRET:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == SSA_NAME);
+ target = chkp_get_rtl_bounds (arg0);
+ /* If no bounds were specified for returned value,
+ then use INIT bounds. It usually happens when
+ some built-in function is expanded. */
+ if (!target)
+ {
+ rtx t1 = gen_reg_rtx (Pmode);
+ rtx t2 = gen_reg_rtx (Pmode);
+ target = gen_reg_rtx (BNDmode);
+ emit_move_insn (t1, const0_rtx);
+ emit_move_insn (t2, constm1_rtx);
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, t1, t2)
+ : gen_bnd32_mk (target, t1, t2));
+ }
+ gcc_assert (target && REG_P (target));
+ return target;
+
+ case IX86_BUILTIN_BNDNARROW:
+ {
+ rtx m1, m1h1, m1h2, lb, ub, t1;
+
+ /* Return value and lb. */
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ /* Bounds. */
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ /* Size. */
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ /* Size was passed but we need to use (size - 1) as for bndmk. */
+ arg2 = fold_build2 (PLUS_EXPR, TREE_TYPE (arg2), arg2,
+ integer_minus_one_node);
+
+ /* Add LB to size and inverse to get UB. */
+ arg2 = fold_build2 (PLUS_EXPR, TREE_TYPE (arg2), arg2, arg0);
+ arg2 = fold_build1 (BIT_NOT_EXPR, TREE_TYPE (arg2), arg2);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ lb = force_reg (Pmode, op0);
+ ub = force_reg (Pmode, op2);
+
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op1))
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_insn (gen_move_insn (m1, op1));
+ }
+ else
+ m1 = op1;
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0),
+ GET_MODE_SIZE (Pmode)));
+
+ t1 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ ix86_emit_move_max (t1, lb);
+ emit_move_insn (m1h1, t1);
+
+
+ /* Compute UB. UB are stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ ix86_emit_move_max (t1, ub);
+ emit_move_insn (m1h2, t1);
+
+ op2 = gen_reg_rtx (BNDmode);
+ emit_move_insn (op2, m1);
+
+ return chkp_join_splitted_slot (op0, op2);
+ }
+
+ case IX86_BUILTIN_BNDINT:
+ {
+ unsigned bndsize = GET_MODE_SIZE (BNDmode);
+ unsigned psize = GET_MODE_SIZE (Pmode);
+ rtx res = assign_stack_local (BNDmode, bndsize, 0);
+ rtx m1, m2, m1h1, m1h2, m2h1, m2h2, t1, t2, rh1, rh2;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op0))
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (m1, op0);
+ }
+ else
+ m1 = op0;
+
+ if (!MEM_P (op1))
+ {
+ m2 = assign_386_stack_local (BNDmode,
+ MEM_P (op0)
+ ? SLOT_TEMP
+ : SLOT_BND_STORED);
+ emit_move_insn (m2, op1);
+ }
+ else
+ m2 = op1;
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0), psize));
+ m2h1 = gen_rtx_MEM (Pmode, XEXP (m2, 0));
+ m2h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m2, 0), psize));
+ rh1 = gen_rtx_MEM (Pmode, XEXP (res, 0));
+ rh2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (res, 0), psize));
+
+ /* Allocate temporaries. */
+ t1 = gen_reg_rtx (Pmode);
+ t2 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ emit_move_insn (t2, m2h1);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh1, t1);
+
+ /* Compute UB. UB are stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ emit_move_insn (t2, m2h2);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh2, t1);
+
+ return res;
+ }
+
+ case IX86_BUILTIN_SIZEOF:
+ {
+ enum machine_mode mode = Pmode;
+ rtx t1, t2;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == VAR_DECL);
+
+ t1 = gen_reg_rtx (mode);
+ t2 = gen_rtx_SYMBOL_REF (Pmode,
+ IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (arg0)));
+ t2 = gen_rtx_UNSPEC (mode, gen_rtvec (1, t2), UNSPEC_SIZEOF);
+
+ emit_insn (gen_rtx_SET (VOIDmode, t1, t2));
+
+ return t1;
+ }
+
+ case IX86_BUILTIN_BNDLOWER:
+ {
+ rtx mem, hmem;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+ else
+ mem = op0;
+
+ /* Generate mem expression to access LB and load it. */
+ hmem = gen_rtx_MEM (Pmode, XEXP (mem, 0));
+ target = gen_reg_rtx (Pmode);
+ emit_move_insn (target, hmem);
+
+ return target;
+ }
+
+ case IX86_BUILTIN_BNDUPPER:
+ {
+ rtx mem, hmem;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+ else
+ mem = op0;
+
+ /* Generate mem expression to access UB and load it. */
+ hmem = gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode, XEXP (mem, 0),
+ GEN_INT (GET_MODE_SIZE (Pmode))));
+ target = gen_reg_rtx (Pmode);
+ emit_move_insn (target, hmem);
+
+ /* We need to inverse all bits of UB. */
+ emit_insn (gen_rtx_SET (Pmode, target, gen_rtx_NOT (Pmode, target)));
+
+ return target;
+ }
+
case IX86_BUILTIN_MASKMOVQ:
case IX86_BUILTIN_MASKMOVDQU:
icode = (fcode == IX86_BUILTIN_MASKMOVQ
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index a38c5d1..ededa67 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2317,6 +2317,7 @@ enum ix86_stack_slot
SLOT_CW_FLOOR,
SLOT_CW_CEIL,
SLOT_CW_MASK_PM,
+ SLOT_BND_STORED,
MAX_386_STACK_LOCALS
};
Uros Bizjak
2014-09-17 09:42:21 UTC
Permalink
Post by Ilya Enkovich
Post by Uros Bizjak
Hm, can this patch be compiled as part of the series? The expanders
refer to various gen_bnd patterns that I don't see. Also, I don't see
BND mode introduced.
Hi,
Here is a patch from the series that introduces modes and instructions: https://gcc.gnu.org/ml/gcc-patches/2014-04/msg00880.html. It needs update in bndldx expander as you suggested.
Post by Uros Bizjak
Post by Ilya Enkovich
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (Pmode, op1);
+
+ /* Avoid registers which connot be used as index. */
+ if (!index_register_operand (op1, Pmode))
+ {
+ rtx temp = gen_reg_rtx (Pmode);
+ emit_move_insn (temp, op1);
+ op1 = temp;
+ }
+
+ /* If op1 was a register originally then it may have
+ mode other than Pmode. We need to extend in such
+ case because bndldx may work only with Pmode regs. */
+ if (GET_MODE (op1) != Pmode)
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ if (REG_P (target))
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_ldx (target, op0, op1)
+ : gen_bnd32_ldx (target, op0, op1));
+ else
+ {
+ rtx temp = gen_reg_rtx (BNDmode);
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_ldx (temp, op0, op1)
+ : gen_bnd32_ldx (temp, op0, op1));
+ emit_move_insn (target, temp);
+ }
+ return target;
I don't like the way arguments are prepared. For the case above,
bnd_ldx should have index_register_operand predicate in its pattern,
and this predicate (and its mode) should be checked in the expander
code. There are many examples of argument expansion in
ix86_expand_builtin function, including how Pmode is handled.
Also, please see how target is handled there. Target can be null, so
REG_P predicate will crash.
You should also select insn patterns depending on BNDmode, not TARGET_64BIT.
Please use assign_386_stack_local so stack slots can be shared.
SLOT_TEMP is intended for short-lived temporaries, you can introduce
new slots if you need more live values at once.
Uros.
Thanks for comments! Here is a new version in which I addressed all your concerns.
Unfortunately, it doesn't. The patch only fixed one instance w.r.t to
target handling, the one I referred as an example. You still have
unchecked target, at least in IX86_BUILTIN_BNDMK.

However, you have a general problems in your builtin expansion code,
so please look at how other builtins are handled. E.g.:

if (optimize || !target
|| GET_MODE (target) != tmode
|| !register_operand(target, tmode))
target = gen_reg_rtx (tmode);

also, here is an example how input operands are prepared:

op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
if (!register_operand (op0, Pmode))
op0 = ix86_zero_extend_to_Pmode (op0);
if (!register_operand (op1, SImode))
op1 = copy_to_mode_reg (SImode, op1);
if (!register_operand (op2, SImode))
op2 = copy_to_mode_reg (SImode, op2);

So, Pmode is handled in a special way, even when x32 is not considered.

BTW: I wonder if word_mode is needed here, Pmode can be SImode with
address prefix (x32).

Inside the expanders, please use expand_simple_binop and expand_unop
on RTX, not tree expressions. Again, please see many examples.

Please use emit_move_insn instead of

emit_insn (gen_move_insn (m1, op1));

As a wish, can you perhaps write a generic cmove expander to be also
used in other places?
Post by Ilya Enkovich
+static void
+ix86_emit_move_max (rtx dst, rtx src)
Uros.
Post by Ilya Enkovich
Ilya
--
* config/i386/i386-builtin-types.def (BND): New.
(ULONG): New.
(BND_FTYPE_PCVOID_ULONG): New.
(VOID_FTYPE_BND_PCVOID): New.
(VOID_FTYPE_PCVOID_PCVOID_BND): New.
(BND_FTYPE_PCVOID_PCVOID): New.
(BND_FTYPE_PCVOID): New.
(BND_FTYPE_BND_BND): New.
(PVOID_FTYPE_PVOID_PVOID_ULONG): New.
(PVOID_FTYPE_PCVOID_BND_ULONG): New.
(ULONG_FTYPE_VOID): New.
(PVOID_FTYPE_BND): New.
* config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
(ix86_builtins): Add
IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
IX86_BUILTIN_BNDUPPER.
(builtin_isa): Add leaf_p and nothrow_p fields.
(def_builtin): Initialize leaf_p and nothrow_p.
(ix86_add_new_builtins): Handle leaf_p and nothrow_p
flags.
(bdesc_mpx): New.
(bdesc_mpx_const): New.
(ix86_init_mpx_builtins): New.
(ix86_init_builtins): Call ix86_init_mpx_builtins.
(ix86_emit_move_max): New.
(ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.
* config/i386/i386.h (ix86_stack_slot): Added SLOT_BND_STORED.
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 35c0035..989297a 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -47,6 +47,7 @@ DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
DEF_PRIMITIVE_TYPE (QI, char_type_node)
DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
+DEF_PRIMITIVE_TYPE (BND, pointer_bounds_type_node)
# ??? Logically this should be intDI_type_node, but that maps to "long"
# with 64-bit, and that's not how the emmintrin.h is written. Again,
# changing this would change name mangling.
@@ -60,6 +61,7 @@ DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
DEF_PRIMITIVE_TYPE (INT, integer_type_node)
DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
DEF_PRIMITIVE_TYPE (UNSIGNED, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (ULONG, long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (LONGLONG, long_long_integer_type_node)
DEF_PRIMITIVE_TYPE (ULONGLONG, long_long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
@@ -806,3 +808,15 @@ DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, TF)
DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, TF)
+
+# MPX builtins
+DEF_FUNCTION_TYPE (BND, PCVOID, ULONG)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, BND, BND)
+DEF_FUNCTION_TYPE (PVOID, PVOID, PVOID, ULONG)
+DEF_FUNCTION_TYPE (PVOID, PCVOID, BND, ULONG)
+DEF_FUNCTION_TYPE (ULONG, VOID)
+DEF_FUNCTION_TYPE (PVOID, BND)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d0f58b1..a076450 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -85,6 +85,8 @@ along with GCC; see the file COPYING3. If not see
#include "tree-vectorizer.h"
#include "shrink-wrap.h"
#include "builtins.h"
+#include "tree-chkp.h"
+#include "rtl-chkp.h"
static rtx legitimize_dllimport_symbol (rtx, bool);
static rtx legitimize_pe_coff_extern_decl (rtx, bool);
@@ -28775,6 +28777,19 @@ enum ix86_builtins
IX86_BUILTIN_XABORT,
IX86_BUILTIN_XTEST,
+ /* MPX */
+ IX86_BUILTIN_BNDMK,
+ IX86_BUILTIN_BNDSTX,
+ IX86_BUILTIN_BNDLDX,
+ IX86_BUILTIN_BNDCL,
+ IX86_BUILTIN_BNDCU,
+ IX86_BUILTIN_BNDRET,
+ IX86_BUILTIN_BNDNARROW,
+ IX86_BUILTIN_BNDINT,
+ IX86_BUILTIN_SIZEOF,
+ IX86_BUILTIN_BNDLOWER,
+ IX86_BUILTIN_BNDUPPER,
+
/* BMI instructions. */
IX86_BUILTIN_BEXTR32,
IX86_BUILTIN_BEXTR64,
@@ -28848,6 +28863,8 @@ struct builtin_isa {
enum ix86_builtin_func_type tcode; /* type to use in the declaration */
HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
bool const_p; /* true if the declaration is constant */
+ bool leaf_p; /* true if the declaration has leaf attribute */
+ bool nothrow_p; /* true if the declaration has nothrow attribute */
bool set_and_not_built_p;
};
@@ -28899,6 +28916,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
ix86_builtins[(int) code] = NULL_TREE;
ix86_builtins_isa[(int) code].tcode = tcode;
ix86_builtins_isa[(int) code].name = name;
+ ix86_builtins_isa[(int) code].leaf_p = false;
+ ix86_builtins_isa[(int) code].nothrow_p = false;
ix86_builtins_isa[(int) code].const_p = false;
ix86_builtins_isa[(int) code].set_and_not_built_p = true;
}
@@ -28949,6 +28968,11 @@ ix86_add_new_builtins (HOST_WIDE_INT isa)
ix86_builtins[i] = decl;
if (ix86_builtins_isa[i].const_p)
TREE_READONLY (decl) = 1;
+ if (ix86_builtins_isa[i].leaf_p)
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ if (ix86_builtins_isa[i].nothrow_p)
+ TREE_NOTHROW (decl) = 1;
}
}
}
@@ -30402,6 +30426,27 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
};
+/* Bultins for MPX. */
+static const struct builtin_description bdesc_mpx[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+};
+
+/* Const builtins for MPX. */
+static const struct builtin_description bdesc_mpx_const[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
+};
+
/* FMA4 and XOP. */
#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
@@ -31250,6 +31295,67 @@ ix86_init_mmx_sse_builtins (void)
}
}
+static void
+ix86_init_mpx_builtins ()
+{
+ const struct builtin_description * d;
+ enum ix86_builtin_func_type ftype;
+ tree decl;
+ size_t i;
+
+ for (i = 0, d = bdesc_mpx;
+ i < ARRAY_SIZE (bdesc_mpx);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin (d->mask, d->name, ftype, d->code);
+
+ /* With no leaf and nothrow flags for MPX builtins
+ abnormal edges may follow its call when setjmp
+ presents in the function. Since we may have a lot
+ of MPX builtins calls it causes lots of useless
+ edges and enormous PHI nodes. To avoid this we mark
+ MPX builtins as leaf and nothrow. */
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+
+ for (i = 0, d = bdesc_mpx_const;
+ i < ARRAY_SIZE (bdesc_mpx_const);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin_const (d->mask, d->name, ftype, d->code);
+
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+}
+
/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
to return a pointer to VERSION_DECL if the outcome of the expression
formed by PREDICATE_CHAIN is true. This function will be called during
@@ -32788,6 +32894,7 @@ ix86_init_builtins (void)
ix86_init_tm_builtins ();
ix86_init_mmx_sse_builtins ();
+ ix86_init_mpx_builtins ();
if (TARGET_LP64)
ix86_init_builtins_va_builtins_abi ();
@@ -35053,6 +35160,29 @@ ix86_expand_vec_set_builtin (tree exp)
return target;
}
+/* Choose max of DST and SRC and put it to DST. */
+static void
+ix86_emit_move_max (rtx dst, rtx src)
+{
+ rtx t;
+
+ if (TARGET_CMOVE)
+ {
+ t = ix86_expand_compare (LTU, dst, src);
+ emit_insn (gen_rtx_SET (VOIDmode, dst,
+ gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
+ src, dst)));
+ }
+ else
+ {
+ rtx nomove = gen_label_rtx ();
+ emit_cmp_and_jump_insns (dst, src, GEU, const0_rtx,
+ GET_MODE (dst), 1, nomove);
+ emit_insn (gen_rtx_SET (VOIDmode, dst, src));
+ emit_label (nomove);
+ }
+}
+
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
@@ -35118,6 +35248,311 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
switch (fcode)
{
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ /* Builtin arg1 is size of block but instruction op1 should
+ be (size - 1). */
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (fold_build2 (PLUS_EXPR, TREE_TYPE (arg1),
+ arg1, integer_minus_one_node));
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (Pmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, op0, op1)
+ : gen_bnd32_mk (target, op0, op1));
+ return target;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (BNDmode, op1);
+ op2 = force_reg (Pmode, op2);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_stx (op2, op0, op1)
+ : gen_bnd32_stx (op2, op0, op1));
+ return 0;
+
+ if (!target)
+ return 0;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (Pmode, op1);
+
+ if (REG_P (target))
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_ldx (target, op0, op1)
+ : gen_bnd32_ldx (target, op0, op1));
+ else
+ {
+ rtx temp = gen_reg_rtx (BNDmode);
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_ldx (temp, op0, op1)
+ : gen_bnd32_ldx (temp, op0, op1));
+ emit_move_insn (target, temp);
+ }
+ return target;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cl (op1, op0)
+ : gen_bnd32_cl (op1, op0));
+ return 0;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cu (op1, op0)
+ : gen_bnd32_cu (op1, op0));
+ return 0;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == SSA_NAME);
+ target = chkp_get_rtl_bounds (arg0);
+ /* If no bounds were specified for returned value,
+ then use INIT bounds. It usually happens when
+ some built-in function is expanded. */
+ if (!target)
+ {
+ rtx t1 = gen_reg_rtx (Pmode);
+ rtx t2 = gen_reg_rtx (Pmode);
+ target = gen_reg_rtx (BNDmode);
+ emit_move_insn (t1, const0_rtx);
+ emit_move_insn (t2, constm1_rtx);
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, t1, t2)
+ : gen_bnd32_mk (target, t1, t2));
+ }
+ gcc_assert (target && REG_P (target));
+ return target;
+
+ {
+ rtx m1, m1h1, m1h2, lb, ub, t1;
+
+ /* Return value and lb. */
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ /* Bounds. */
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ /* Size. */
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ /* Size was passed but we need to use (size - 1) as for bndmk. */
+ arg2 = fold_build2 (PLUS_EXPR, TREE_TYPE (arg2), arg2,
+ integer_minus_one_node);
+
+ /* Add LB to size and inverse to get UB. */
+ arg2 = fold_build2 (PLUS_EXPR, TREE_TYPE (arg2), arg2, arg0);
+ arg2 = fold_build1 (BIT_NOT_EXPR, TREE_TYPE (arg2), arg2);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ lb = force_reg (Pmode, op0);
+ ub = force_reg (Pmode, op2);
+
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op1))
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_insn (gen_move_insn (m1, op1));
+ }
+ else
+ m1 = op1;
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0),
+ GET_MODE_SIZE (Pmode)));
+
+ t1 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ ix86_emit_move_max (t1, lb);
+ emit_move_insn (m1h1, t1);
+
+
+ /* Compute UB. UB are stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ ix86_emit_move_max (t1, ub);
+ emit_move_insn (m1h2, t1);
+
+ op2 = gen_reg_rtx (BNDmode);
+ emit_move_insn (op2, m1);
+
+ return chkp_join_splitted_slot (op0, op2);
+ }
+
+ {
+ unsigned bndsize = GET_MODE_SIZE (BNDmode);
+ unsigned psize = GET_MODE_SIZE (Pmode);
+ rtx res = assign_stack_local (BNDmode, bndsize, 0);
+ rtx m1, m2, m1h1, m1h2, m2h1, m2h2, t1, t2, rh1, rh2;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op0))
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (m1, op0);
+ }
+ else
+ m1 = op0;
+
+ if (!MEM_P (op1))
+ {
+ m2 = assign_386_stack_local (BNDmode,
+ MEM_P (op0)
+ ? SLOT_TEMP
+ : SLOT_BND_STORED);
+ emit_move_insn (m2, op1);
+ }
+ else
+ m2 = op1;
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0), psize));
+ m2h1 = gen_rtx_MEM (Pmode, XEXP (m2, 0));
+ m2h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m2, 0), psize));
+ rh1 = gen_rtx_MEM (Pmode, XEXP (res, 0));
+ rh2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (res, 0), psize));
+
+ /* Allocate temporaries. */
+ t1 = gen_reg_rtx (Pmode);
+ t2 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ emit_move_insn (t2, m2h1);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh1, t1);
+
+ /* Compute UB. UB are stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ emit_move_insn (t2, m2h2);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh2, t1);
+
+ return res;
+ }
+
+ {
+ enum machine_mode mode = Pmode;
+ rtx t1, t2;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == VAR_DECL);
+
+ t1 = gen_reg_rtx (mode);
+ t2 = gen_rtx_SYMBOL_REF (Pmode,
+ IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (arg0)));
+ t2 = gen_rtx_UNSPEC (mode, gen_rtvec (1, t2), UNSPEC_SIZEOF);
+
+ emit_insn (gen_rtx_SET (VOIDmode, t1, t2));
+
+ return t1;
+ }
+
+ {
+ rtx mem, hmem;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+ else
+ mem = op0;
+
+ /* Generate mem expression to access LB and load it. */
+ hmem = gen_rtx_MEM (Pmode, XEXP (mem, 0));
+ target = gen_reg_rtx (Pmode);
+ emit_move_insn (target, hmem);
+
+ return target;
+ }
+
+ {
+ rtx mem, hmem;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+ else
+ mem = op0;
+
+ /* Generate mem expression to access UB and load it. */
+ hmem = gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode, XEXP (mem, 0),
+ GEN_INT (GET_MODE_SIZE (Pmode))));
+ target = gen_reg_rtx (Pmode);
+ emit_move_insn (target, hmem);
+
+ /* We need to inverse all bits of UB. */
+ emit_insn (gen_rtx_SET (Pmode, target, gen_rtx_NOT (Pmode, target)));
+
+ return target;
+ }
+
icode = (fcode == IX86_BUILTIN_MASKMOVQ
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index a38c5d1..ededa67 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2317,6 +2317,7 @@ enum ix86_stack_slot
SLOT_CW_FLOOR,
SLOT_CW_CEIL,
SLOT_CW_MASK_PM,
+ SLOT_BND_STORED,
MAX_386_STACK_LOCALS
};
Ilya Enkovich
2014-09-17 16:31:04 UTC
Permalink
Post by Uros Bizjak
Post by Ilya Enkovich
Post by Uros Bizjak
Hm, can this patch be compiled as part of the series? The expanders
refer to various gen_bnd patterns that I don't see. Also, I don't see
BND mode introduced.
Hi,
Here is a patch from the series that introduces modes and instructions: https://gcc.gnu.org/ml/gcc-patches/2014-04/msg00880.html. It needs update in bndldx expander as you suggested.
Post by Uros Bizjak
Post by Ilya Enkovich
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = force_reg (Pmode, op0);
+ op1 = force_reg (Pmode, op1);
+
+ /* Avoid registers which connot be used as index. */
+ if (!index_register_operand (op1, Pmode))
+ {
+ rtx temp = gen_reg_rtx (Pmode);
+ emit_move_insn (temp, op1);
+ op1 = temp;
+ }
+
+ /* If op1 was a register originally then it may have
+ mode other than Pmode. We need to extend in such
+ case because bndldx may work only with Pmode regs. */
+ if (GET_MODE (op1) != Pmode)
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ if (REG_P (target))
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_ldx (target, op0, op1)
+ : gen_bnd32_ldx (target, op0, op1));
+ else
+ {
+ rtx temp = gen_reg_rtx (BNDmode);
+ emit_insn (TARGET_64BIT
+ ? gen_bnd64_ldx (temp, op0, op1)
+ : gen_bnd32_ldx (temp, op0, op1));
+ emit_move_insn (target, temp);
+ }
+ return target;
I don't like the way arguments are prepared. For the case above,
bnd_ldx should have index_register_operand predicate in its pattern,
and this predicate (and its mode) should be checked in the expander
code. There are many examples of argument expansion in
ix86_expand_builtin function, including how Pmode is handled.
Also, please see how target is handled there. Target can be null, so
REG_P predicate will crash.
You should also select insn patterns depending on BNDmode, not TARGET_64BIT.
Please use assign_386_stack_local so stack slots can be shared.
SLOT_TEMP is intended for short-lived temporaries, you can introduce
new slots if you need more live values at once.
Uros.
Thanks for comments! Here is a new version in which I addressed all your concerns.
Unfortunately, it doesn't. The patch only fixed one instance w.r.t to
target handling, the one I referred as an example. You still have
unchecked target, at least in IX86_BUILTIN_BNDMK.
However, you have a general problems in your builtin expansion code,
if (optimize || !target
|| GET_MODE (target) != tmode
|| !register_operand(target, tmode))
target = gen_reg_rtx (tmode);
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
if (!register_operand (op0, Pmode))
op0 = ix86_zero_extend_to_Pmode (op0);
if (!register_operand (op1, SImode))
op1 = copy_to_mode_reg (SImode, op1);
if (!register_operand (op2, SImode))
op2 = copy_to_mode_reg (SImode, op2);
So, Pmode is handled in a special way, even when x32 is not considered.
BTW: I wonder if word_mode is needed here, Pmode can be SImode with
address prefix (x32).
Inside the expanders, please use expand_simple_binop and expand_unop
on RTX, not tree expressions. Again, please see many examples.
Thank you for additional explanations. Hope this time I answer your concerns correctly :)
Post by Uros Bizjak
Please use emit_move_insn instead of
emit_insn (gen_move_insn (m1, op1));
As a wish, can you perhaps write a generic cmove expander to be also
used in other places?
I added ix86_emit_cmove as a general form of ix86_emit_move_max.
Post by Uros Bizjak
Post by Ilya Enkovich
+static void
+ix86_emit_move_max (rtx dst, rtx src)
Uros.
Thanks,
Ilya
--
2014-09-17 Ilya Enkovich <***@intel.com>

* config/i386/i386-builtin-types.def (BND): New.
(ULONG): New.
(BND_FTYPE_PCVOID_ULONG): New.
(VOID_FTYPE_BND_PCVOID): New.
(VOID_FTYPE_PCVOID_PCVOID_BND): New.
(BND_FTYPE_PCVOID_PCVOID): New.
(BND_FTYPE_PCVOID): New.
(BND_FTYPE_BND_BND): New.
(PVOID_FTYPE_PVOID_PVOID_ULONG): New.
(PVOID_FTYPE_PCVOID_BND_ULONG): New.
(ULONG_FTYPE_VOID): New.
(PVOID_FTYPE_BND): New.
* config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
(ix86_builtins): Add
IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
IX86_BUILTIN_BNDUPPER.
(builtin_isa): Add leaf_p and nothrow_p fields.
(def_builtin): Initialize leaf_p and nothrow_p.
(ix86_add_new_builtins): Handle leaf_p and nothrow_p
flags.
(bdesc_mpx): New.
(bdesc_mpx_const): New.
(ix86_init_mpx_builtins): New.
(ix86_init_builtins): Call ix86_init_mpx_builtins.
(ix86_emit_cmove): New.
(ix86_emit_move_max): New.
(ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.
* config/i386/i386.h (ix86_stack_slot): Added SLOT_BND_STORED.


diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 35c0035..989297a 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -47,6 +47,7 @@ DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
DEF_PRIMITIVE_TYPE (QI, char_type_node)
DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
+DEF_PRIMITIVE_TYPE (BND, pointer_bounds_type_node)
# ??? Logically this should be intDI_type_node, but that maps to "long"
# with 64-bit, and that's not how the emmintrin.h is written. Again,
# changing this would change name mangling.
@@ -60,6 +61,7 @@ DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
DEF_PRIMITIVE_TYPE (INT, integer_type_node)
DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
DEF_PRIMITIVE_TYPE (UNSIGNED, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (ULONG, long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (LONGLONG, long_long_integer_type_node)
DEF_PRIMITIVE_TYPE (ULONGLONG, long_long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
@@ -806,3 +808,15 @@ DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, TF)
DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, TF)
+
+# MPX builtins
+DEF_FUNCTION_TYPE (BND, PCVOID, ULONG)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, BND, BND)
+DEF_FUNCTION_TYPE (PVOID, PVOID, PVOID, ULONG)
+DEF_FUNCTION_TYPE (PVOID, PCVOID, BND, ULONG)
+DEF_FUNCTION_TYPE (ULONG, VOID)
+DEF_FUNCTION_TYPE (PVOID, BND)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d0f58b1..9d8935c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -85,6 +85,8 @@ along with GCC; see the file COPYING3. If not see
#include "tree-vectorizer.h"
#include "shrink-wrap.h"
#include "builtins.h"
+#include "tree-chkp.h"
+#include "rtl-chkp.h"

static rtx legitimize_dllimport_symbol (rtx, bool);
static rtx legitimize_pe_coff_extern_decl (rtx, bool);
@@ -28775,6 +28777,19 @@ enum ix86_builtins
IX86_BUILTIN_XABORT,
IX86_BUILTIN_XTEST,

+ /* MPX */
+ IX86_BUILTIN_BNDMK,
+ IX86_BUILTIN_BNDSTX,
+ IX86_BUILTIN_BNDLDX,
+ IX86_BUILTIN_BNDCL,
+ IX86_BUILTIN_BNDCU,
+ IX86_BUILTIN_BNDRET,
+ IX86_BUILTIN_BNDNARROW,
+ IX86_BUILTIN_BNDINT,
+ IX86_BUILTIN_SIZEOF,
+ IX86_BUILTIN_BNDLOWER,
+ IX86_BUILTIN_BNDUPPER,
+
/* BMI instructions. */
IX86_BUILTIN_BEXTR32,
IX86_BUILTIN_BEXTR64,
@@ -28848,6 +28863,8 @@ struct builtin_isa {
enum ix86_builtin_func_type tcode; /* type to use in the declaration */
HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
bool const_p; /* true if the declaration is constant */
+ bool leaf_p; /* true if the declaration has leaf attribute */
+ bool nothrow_p; /* true if the declaration has nothrow attribute */
bool set_and_not_built_p;
};

@@ -28899,6 +28916,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
ix86_builtins[(int) code] = NULL_TREE;
ix86_builtins_isa[(int) code].tcode = tcode;
ix86_builtins_isa[(int) code].name = name;
+ ix86_builtins_isa[(int) code].leaf_p = false;
+ ix86_builtins_isa[(int) code].nothrow_p = false;
ix86_builtins_isa[(int) code].const_p = false;
ix86_builtins_isa[(int) code].set_and_not_built_p = true;
}
@@ -28949,6 +28968,11 @@ ix86_add_new_builtins (HOST_WIDE_INT isa)
ix86_builtins[i] = decl;
if (ix86_builtins_isa[i].const_p)
TREE_READONLY (decl) = 1;
+ if (ix86_builtins_isa[i].leaf_p)
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ if (ix86_builtins_isa[i].nothrow_p)
+ TREE_NOTHROW (decl) = 1;
}
}
}
@@ -30402,6 +30426,27 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
};

+/* Bultins for MPX. */
+static const struct builtin_description bdesc_mpx[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+};
+
+/* Const builtins for MPX. */
+static const struct builtin_description bdesc_mpx_const[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
+};
+
/* FMA4 and XOP. */
#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
@@ -31250,6 +31295,67 @@ ix86_init_mmx_sse_builtins (void)
}
}

+static void
+ix86_init_mpx_builtins ()
+{
+ const struct builtin_description * d;
+ enum ix86_builtin_func_type ftype;
+ tree decl;
+ size_t i;
+
+ for (i = 0, d = bdesc_mpx;
+ i < ARRAY_SIZE (bdesc_mpx);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin (d->mask, d->name, ftype, d->code);
+
+ /* With no leaf and nothrow flags for MPX builtins
+ abnormal edges may follow its call when setjmp
+ presents in the function. Since we may have a lot
+ of MPX builtins calls it causes lots of useless
+ edges and enormous PHI nodes. To avoid this we mark
+ MPX builtins as leaf and nothrow. */
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+
+ for (i = 0, d = bdesc_mpx_const;
+ i < ARRAY_SIZE (bdesc_mpx_const);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin_const (d->mask, d->name, ftype, d->code);
+
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+}
+
/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
to return a pointer to VERSION_DECL if the outcome of the expression
formed by PREDICATE_CHAIN is true. This function will be called during
@@ -32788,6 +32894,7 @@ ix86_init_builtins (void)

ix86_init_tm_builtins ();
ix86_init_mmx_sse_builtins ();
+ ix86_init_mpx_builtins ();

if (TARGET_LP64)
ix86_init_builtins_va_builtins_abi ();
@@ -35053,6 +35160,37 @@ ix86_expand_vec_set_builtin (tree exp)
return target;
}

+/* Emit conditional move of SRC to DST with condition
+ OP1 CODE OP2. */
+static void
+ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
+{
+ rtx t;
+
+ if (TARGET_CMOVE)
+ {
+ t = ix86_expand_compare (code, op1, op2);
+ emit_insn (gen_rtx_SET (VOIDmode, dst,
+ gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
+ src, dst)));
+ }
+ else
+ {
+ rtx nomove = gen_label_rtx ();
+ emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
+ const0_rtx, GET_MODE (op1), 1, nomove);
+ emit_move_insn (dst, src);
+ emit_label (nomove);
+ }
+}
+
+/* Choose max of DST and SRC and put it to DST. */
+static void
+ix86_emit_move_max (rtx dst, rtx src)
+{
+ ix86_emit_cmove (dst, src, LTU, dst, src);
+}
+
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
@@ -35118,6 +35256,344 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,

switch (fcode)
{
+ case IX86_BUILTIN_BNDMK:
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, Pmode))
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ /* Builtin arg1 is size of block but instruction op1 should
+ be (size - 1). */
+ op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
+ op1, 1, OPTAB_DIRECT);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, op0, op1)
+ : gen_bnd32_mk (target, op0, op1));
+ return target;
+
+ case IX86_BUILTIN_BNDSTX:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+ if (!register_operand (op2, Pmode))
+ op2 = ix86_zero_extend_to_Pmode (op2);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_stx (op2, op0, op1)
+ : gen_bnd32_stx (op2, op0, op1));
+ return 0;
+
+ case IX86_BUILTIN_BNDLDX:
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, Pmode))
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_ldx (target, op0, op1)
+ : gen_bnd32_ldx (target, op0, op1));
+ return target;
+
+ case IX86_BUILTIN_BNDCL:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cl (op1, op0)
+ : gen_bnd32_cl (op1, op0));
+ return 0;
+
+ case IX86_BUILTIN_BNDCU:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cu (op1, op0)
+ : gen_bnd32_cu (op1, op0));
+ return 0;
+
+ case IX86_BUILTIN_BNDRET:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == SSA_NAME);
+ target = chkp_get_rtl_bounds (arg0);
+ /* If no bounds were specified for returned value,
+ then use INIT bounds. It usually happens when
+ some built-in function is expanded. */
+ if (!target)
+ {
+ rtx t1 = gen_reg_rtx (Pmode);
+ rtx t2 = gen_reg_rtx (Pmode);
+ target = gen_reg_rtx (BNDmode);
+ emit_move_insn (t1, const0_rtx);
+ emit_move_insn (t2, constm1_rtx);
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, t1, t2)
+ : gen_bnd32_mk (target, t1, t2));
+ }
+ gcc_assert (target && REG_P (target));
+ return target;
+
+ case IX86_BUILTIN_BNDNARROW:
+ {
+ rtx m1, m1h1, m1h2, lb, ub, t1;
+
+ /* Return value and lb. */
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ /* Bounds. */
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ /* Size. */
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ lb = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ /* Size was passed but we need to use (size - 1) as for bndmk. */
+ op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
+ op2, 1, OPTAB_DIRECT);
+
+ /* Add LB to size and inverse to get UB. */
+ op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
+ op2, 1, OPTAB_DIRECT);
+ ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
+
+ if (!register_operand (lb, Pmode))
+ lb = ix86_zero_extend_to_Pmode (lb);
+ if (!register_operand (op2, Pmode))
+ ub = ix86_zero_extend_to_Pmode (op2);
+
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op1))
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (m1, op1);
+ }
+ else
+ m1 = op1;
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0),
+ GET_MODE_SIZE (Pmode)));
+
+ t1 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ ix86_emit_move_max (t1, lb);
+ emit_move_insn (m1h1, t1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ ix86_emit_move_max (t1, ub);
+ emit_move_insn (m1h2, t1);
+
+ op2 = gen_reg_rtx (BNDmode);
+ emit_move_insn (op2, m1);
+
+ return chkp_join_splitted_slot (lb, op2);
+ }
+
+ case IX86_BUILTIN_BNDINT:
+ {
+ unsigned bndsize = GET_MODE_SIZE (BNDmode);
+ unsigned psize = GET_MODE_SIZE (Pmode);
+ rtx res, m1, m2, m1h1, m1h2, m2h1, m2h2, t1, t2, rh1, rh2;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op0))
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (m1, op0);
+ }
+ else
+ m1 = op0;
+
+ if (!MEM_P (op1))
+ {
+ m2 = assign_386_stack_local (BNDmode,
+ MEM_P (op0)
+ ? SLOT_TEMP
+ : SLOT_BND_STORED);
+ emit_move_insn (m2, op1);
+ }
+ else
+ m2 = op1;
+
+ if (!MEM_P (op0))
+ res = m1;
+ else if (!MEM_P (op1))
+ res = m2;
+ else
+ res = assign_386_stack_local (BNDmode, SLOT_TEMP);
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0), psize));
+ m2h1 = gen_rtx_MEM (Pmode, XEXP (m2, 0));
+ m2h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m2, 0), psize));
+ rh1 = gen_rtx_MEM (Pmode, XEXP (res, 0));
+ rh2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (res, 0), psize));
+
+ /* Allocate temporaries. */
+ t1 = gen_reg_rtx (Pmode);
+ t2 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ emit_move_insn (t2, m2h1);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh1, t1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ emit_move_insn (t2, m2h2);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh2, t1);
+
+ return res;
+ }
+
+ case IX86_BUILTIN_SIZEOF:
+ {
+ enum machine_mode mode = Pmode;
+ tree name;
+ rtx temp;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == VAR_DECL);
+
+ name = DECL_ASSEMBLER_NAME (arg0);
+ temp = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
+ temp = gen_rtx_UNSPEC (mode, gen_rtvec (1, temp), UNSPEC_SIZEOF);
+
+ emit_move_insn (target, temp);
+
+ return target;
+ }
+
+ case IX86_BUILTIN_BNDLOWER:
+ {
+ rtx mem, hmem;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+ else
+ mem = op0;
+
+ /* Generate mem expression to access LB and load it. */
+ hmem = gen_rtx_MEM (Pmode, XEXP (mem, 0));
+ emit_move_insn (target, hmem);
+
+ return target;
+ }
+
+ case IX86_BUILTIN_BNDUPPER:
+ {
+ rtx mem, hmem;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+ else
+ mem = op0;
+
+ /* Generate mem expression to access UB and load it. */
+ hmem = gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode, XEXP (mem, 0),
+ GEN_INT (GET_MODE_SIZE (Pmode))));
+ emit_move_insn (target, hmem);
+
+ /* We need to inverse all bits of UB. */
+ emit_move_insn (target, gen_rtx_NOT (Pmode, target));
+
+ return target;
+ }
+
case IX86_BUILTIN_MASKMOVQ:
case IX86_BUILTIN_MASKMOVDQU:
icode = (fcode == IX86_BUILTIN_MASKMOVQ
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index a38c5d1..ededa67 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2317,6 +2317,7 @@ enum ix86_stack_slot
SLOT_CW_FLOOR,
SLOT_CW_CEIL,
SLOT_CW_MASK_PM,
+ SLOT_BND_STORED,
MAX_386_STACK_LOCALS
};
Uros Bizjak
2014-09-17 18:06:50 UTC
Permalink
Post by Ilya Enkovich
Post by Uros Bizjak
Post by Ilya Enkovich
Post by Uros Bizjak
I don't like the way arguments are prepared. For the case above,
bnd_ldx should have index_register_operand predicate in its pattern,
and this predicate (and its mode) should be checked in the expander
code. There are many examples of argument expansion in
ix86_expand_builtin function, including how Pmode is handled.
Also, please see how target is handled there. Target can be null, so
REG_P predicate will crash.
You should also select insn patterns depending on BNDmode, not TARGET_64BIT.
Please use assign_386_stack_local so stack slots can be shared.
SLOT_TEMP is intended for short-lived temporaries, you can introduce
new slots if you need more live values at once.
Uros.
Thanks for comments! Here is a new version in which I addressed all your concerns.
Unfortunately, it doesn't. The patch only fixed one instance w.r.t to
target handling, the one I referred as an example. You still have
unchecked target, at least in IX86_BUILTIN_BNDMK.
However, you have a general problems in your builtin expansion code,
if (optimize || !target
|| GET_MODE (target) != tmode
|| !register_operand(target, tmode))
target = gen_reg_rtx (tmode);
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
if (!register_operand (op0, Pmode))
op0 = ix86_zero_extend_to_Pmode (op0);
if (!register_operand (op1, SImode))
op1 = copy_to_mode_reg (SImode, op1);
if (!register_operand (op2, SImode))
op2 = copy_to_mode_reg (SImode, op2);
So, Pmode is handled in a special way, even when x32 is not considered.
BTW: I wonder if word_mode is needed here, Pmode can be SImode with
address prefix (x32).
Inside the expanders, please use expand_simple_binop and expand_unop
on RTX, not tree expressions. Again, please see many examples.
Thank you for additional explanations. Hope this time I answer your concerns correctly :)
Yes, this version is MUCH better. There are further comments down the code.
Post by Ilya Enkovich
* config/i386/i386-builtin-types.def (BND): New.
(ULONG): New.
(BND_FTYPE_PCVOID_ULONG): New.
(VOID_FTYPE_BND_PCVOID): New.
(VOID_FTYPE_PCVOID_PCVOID_BND): New.
(BND_FTYPE_PCVOID_PCVOID): New.
(BND_FTYPE_PCVOID): New.
(BND_FTYPE_BND_BND): New.
(PVOID_FTYPE_PVOID_PVOID_ULONG): New.
(PVOID_FTYPE_PCVOID_BND_ULONG): New.
(ULONG_FTYPE_VOID): New.
(PVOID_FTYPE_BND): New.
* config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
(ix86_builtins): Add
IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
IX86_BUILTIN_BNDUPPER.
(builtin_isa): Add leaf_p and nothrow_p fields.
(def_builtin): Initialize leaf_p and nothrow_p.
(ix86_add_new_builtins): Handle leaf_p and nothrow_p
flags.
(bdesc_mpx): New.
(bdesc_mpx_const): New.
(ix86_init_mpx_builtins): New.
(ix86_init_builtins): Call ix86_init_mpx_builtins.
(ix86_emit_cmove): New.
(ix86_emit_move_max): New.
(ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.
* config/i386/i386.h (ix86_stack_slot): Added SLOT_BND_STORED.
..
Post by Ilya Enkovich
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op1))
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (m1, op1);
+ }
+ else
+ m1 = op1;
No negative conditions, please. Just swap the arms of if sentence. It
is much more readable.
Post by Ilya Enkovich
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0),
+ GET_MODE_SIZE (Pmode)));
Please use adjust_address instead of manually producing MEMs.
Post by Ilya Enkovich
+
+ t1 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ ix86_emit_move_max (t1, lb);
+ emit_move_insn (m1h1, t1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ ix86_emit_move_max (t1, ub);
+ emit_move_insn (m1h2, t1);
+
+ op2 = gen_reg_rtx (BNDmode);
+ emit_move_insn (op2, m1);
+
+ return chkp_join_splitted_slot (lb, op2);
+ }
+
The handling in this builtin looks strange.

I suggest to do it in a serial way, like this:

if (MEM_P (op0)
m0 = op0;
else
{
m0 = stack (SLOT_TEMP)
move (op0, m0)
}

move parts of m0 to temporaries.

if (MEM_P (op1)
m1 = op1;
else
{
m1 = stack (SLOT_TEMP)
move (op1, m1)
}

move parts of m1 to another temporaries.

Process temporaries.

res = stack (SLOT_BND_STORED)

move calculated stuff to res.

This will ensure that nobody will clobber your stack slot with the
result, assuming consumer will soon use it. SLOT_TEMP is a short-lived
slot, and can be reused.
Post by Ilya Enkovich
+ {
+ unsigned bndsize = GET_MODE_SIZE (BNDmode);
+ unsigned psize = GET_MODE_SIZE (Pmode);
+ rtx res, m1, m2, m1h1, m1h2, m2h1, m2h2, t1, t2, rh1, rh2;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op0))
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (m1, op0);
+ }
+ else
+ m1 = op0;
+
+ if (!MEM_P (op1))
+ {
+ m2 = assign_386_stack_local (BNDmode,
+ MEM_P (op0)
+ ? SLOT_TEMP
+ : SLOT_BND_STORED);
+ emit_move_insn (m2, op1);
+ }
+ else
+ m2 = op1;
+
+ if (!MEM_P (op0))
+ res = m1;
+ else if (!MEM_P (op1))
+ res = m2;
+ else
+ res = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0), psize));
+ m2h1 = gen_rtx_MEM (Pmode, XEXP (m2, 0));
+ m2h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m2, 0), psize));
+ rh1 = gen_rtx_MEM (Pmode, XEXP (res, 0));
+ rh2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (res, 0), psize));
Please use adjust_address here.
Post by Ilya Enkovich
+
+ /* Allocate temporaries. */
+ t1 = gen_reg_rtx (Pmode);
+ t2 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ emit_move_insn (t2, m2h1);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh1, t1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ emit_move_insn (t2, m2h2);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh2, t1);
+
+ return res;
+ }
+
+ {
+ enum machine_mode mode = Pmode;
No need for the above temporary...
Post by Ilya Enkovich
+ tree name;
+ rtx temp;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == VAR_DECL);
+
+ name = DECL_ASSEMBLER_NAME (arg0);
+ temp = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
+ temp = gen_rtx_UNSPEC (mode, gen_rtvec (1, temp), UNSPEC_SIZEOF);
Call expander directly, use target as an operand 0. You won't need move below.
Post by Ilya Enkovich
+ emit_move_insn (target, temp);
+
+ return target;
+ }
+
+ {
+ rtx mem, hmem;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+ else
+ mem = op0;
No negative conditions.
Post by Ilya Enkovich
+ /* Generate mem expression to access LB and load it. */
+ hmem = gen_rtx_MEM (Pmode, XEXP (mem, 0));
adjust_address again.
Post by Ilya Enkovich
+ emit_move_insn (target, hmem);
+
+ return target;
+ }
+
+ {
+ rtx mem, hmem;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+ else
+ mem = op0;
No negative conditions.
Post by Ilya Enkovich
+ /* Generate mem expression to access UB and load it. */
+ hmem = gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode, XEXP (mem, 0),
+ GEN_INT (GET_MODE_SIZE (Pmode))));
adjust_address again.
Post by Ilya Enkovich
+ emit_move_insn (target, hmem);
+
+ /* We need to inverse all bits of UB. */
+ emit_move_insn (target, gen_rtx_NOT (Pmode, target));
Use emit_simple_unop here.
Post by Ilya Enkovich
+
+ return target;
+ }
+
icode = (fcode == IX86_BUILTIN_MASKMOVQ
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index a38c5d1..ededa67 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2317,6 +2317,7 @@ enum ix86_stack_slot
SLOT_CW_FLOOR,
SLOT_CW_CEIL,
SLOT_CW_MASK_PM,
+ SLOT_BND_STORED,
MAX_386_STACK_LOCALS
};
Uros.
Ilya Enkovich
2014-09-18 13:47:34 UTC
Permalink
Post by Uros Bizjak
Post by Ilya Enkovich
Post by Uros Bizjak
Post by Ilya Enkovich
Post by Uros Bizjak
I don't like the way arguments are prepared. For the case above,
bnd_ldx should have index_register_operand predicate in its pattern,
and this predicate (and its mode) should be checked in the expander
code. There are many examples of argument expansion in
ix86_expand_builtin function, including how Pmode is handled.
Also, please see how target is handled there. Target can be null, so
REG_P predicate will crash.
You should also select insn patterns depending on BNDmode, not TARGET_64BIT.
Please use assign_386_stack_local so stack slots can be shared.
SLOT_TEMP is intended for short-lived temporaries, you can introduce
new slots if you need more live values at once.
Uros.
Thanks for comments! Here is a new version in which I addressed all your concerns.
Unfortunately, it doesn't. The patch only fixed one instance w.r.t to
target handling, the one I referred as an example. You still have
unchecked target, at least in IX86_BUILTIN_BNDMK.
However, you have a general problems in your builtin expansion code,
if (optimize || !target
|| GET_MODE (target) != tmode
|| !register_operand(target, tmode))
target = gen_reg_rtx (tmode);
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
if (!register_operand (op0, Pmode))
op0 = ix86_zero_extend_to_Pmode (op0);
if (!register_operand (op1, SImode))
op1 = copy_to_mode_reg (SImode, op1);
if (!register_operand (op2, SImode))
op2 = copy_to_mode_reg (SImode, op2);
So, Pmode is handled in a special way, even when x32 is not considered.
BTW: I wonder if word_mode is needed here, Pmode can be SImode with
address prefix (x32).
Inside the expanders, please use expand_simple_binop and expand_unop
on RTX, not tree expressions. Again, please see many examples.
Thank you for additional explanations. Hope this time I answer your concerns correctly :)
Yes, this version is MUCH better. There are further comments down the code.
Post by Ilya Enkovich
* config/i386/i386-builtin-types.def (BND): New.
(ULONG): New.
(BND_FTYPE_PCVOID_ULONG): New.
(VOID_FTYPE_BND_PCVOID): New.
(VOID_FTYPE_PCVOID_PCVOID_BND): New.
(BND_FTYPE_PCVOID_PCVOID): New.
(BND_FTYPE_PCVOID): New.
(BND_FTYPE_BND_BND): New.
(PVOID_FTYPE_PVOID_PVOID_ULONG): New.
(PVOID_FTYPE_PCVOID_BND_ULONG): New.
(ULONG_FTYPE_VOID): New.
(PVOID_FTYPE_BND): New.
* config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
(ix86_builtins): Add
IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
IX86_BUILTIN_BNDUPPER.
(builtin_isa): Add leaf_p and nothrow_p fields.
(def_builtin): Initialize leaf_p and nothrow_p.
(ix86_add_new_builtins): Handle leaf_p and nothrow_p
flags.
(bdesc_mpx): New.
(bdesc_mpx_const): New.
(ix86_init_mpx_builtins): New.
(ix86_init_builtins): Call ix86_init_mpx_builtins.
(ix86_emit_cmove): New.
(ix86_emit_move_max): New.
(ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.
* config/i386/i386.h (ix86_stack_slot): Added SLOT_BND_STORED.
..
Post by Ilya Enkovich
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op1))
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (m1, op1);
+ }
+ else
+ m1 = op1;
No negative conditions, please. Just swap the arms of if sentence. It
is much more readable.
Post by Ilya Enkovich
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0),
+ GET_MODE_SIZE (Pmode)));
Please use adjust_address instead of manually producing MEMs.
Post by Ilya Enkovich
+
+ t1 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ ix86_emit_move_max (t1, lb);
+ emit_move_insn (m1h1, t1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ ix86_emit_move_max (t1, ub);
+ emit_move_insn (m1h2, t1);
+
+ op2 = gen_reg_rtx (BNDmode);
+ emit_move_insn (op2, m1);
+
+ return chkp_join_splitted_slot (lb, op2);
+ }
+
The handling in this builtin looks strange.
if (MEM_P (op0)
m0 = op0;
else
{
m0 = stack (SLOT_TEMP)
move (op0, m0)
}
move parts of m0 to temporaries.
if (MEM_P (op1)
m1 = op1;
else
{
m1 = stack (SLOT_TEMP)
move (op1, m1)
}
move parts of m1 to another temporaries.
Process temporaries.
res = stack (SLOT_BND_STORED)
move calculated stuff to res.
This will ensure that nobody will clobber your stack slot with the
result, assuming consumer will soon use it. SLOT_TEMP is a short-lived
slot, and can be reused.
I see returning result in a memory is not a good idea at all since result may be corrupted for consequent buiultin calls. And there is no reason in a new slot if result is returned on a regsiter and only one slot is used at the same time. Therefore I removed SLOT_BND_STORED.
Post by Uros Bizjak
Post by Ilya Enkovich
+ {
+ unsigned bndsize = GET_MODE_SIZE (BNDmode);
+ unsigned psize = GET_MODE_SIZE (Pmode);
+ rtx res, m1, m2, m1h1, m1h2, m2h1, m2h2, t1, t2, rh1, rh2;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ /* We need to move bounds to memory before any computations. */
+ if (!MEM_P (op0))
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (m1, op0);
+ }
+ else
+ m1 = op0;
+
+ if (!MEM_P (op1))
+ {
+ m2 = assign_386_stack_local (BNDmode,
+ MEM_P (op0)
+ ? SLOT_TEMP
+ : SLOT_BND_STORED);
+ emit_move_insn (m2, op1);
+ }
+ else
+ m2 = op1;
+
+ if (!MEM_P (op0))
+ res = m1;
+ else if (!MEM_P (op1))
+ res = m2;
+ else
+ res = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
+ m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0), psize));
+ m2h1 = gen_rtx_MEM (Pmode, XEXP (m2, 0));
+ m2h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m2, 0), psize));
+ rh1 = gen_rtx_MEM (Pmode, XEXP (res, 0));
+ rh2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (res, 0), psize));
Please use adjust_address here.
Post by Ilya Enkovich
+
+ /* Allocate temporaries. */
+ t1 = gen_reg_rtx (Pmode);
+ t2 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ emit_move_insn (t2, m2h1);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh1, t1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ emit_move_insn (t2, m2h2);
+ ix86_emit_move_max (t1, t2);
+ emit_move_insn (rh2, t1);
+
+ return res;
+ }
+
+ {
+ enum machine_mode mode = Pmode;
No need for the above temporary...
Post by Ilya Enkovich
+ tree name;
+ rtx temp;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == VAR_DECL);
+
+ name = DECL_ASSEMBLER_NAME (arg0);
+ temp = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
+ temp = gen_rtx_UNSPEC (mode, gen_rtvec (1, temp), UNSPEC_SIZEOF);
Call expander directly, use target as an operand 0. You won't need move below.
Post by Ilya Enkovich
+ emit_move_insn (target, temp);
+
+ return target;
+ }
+
+ {
+ rtx mem, hmem;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+ else
+ mem = op0;
No negative conditions.
Post by Ilya Enkovich
+ /* Generate mem expression to access LB and load it. */
+ hmem = gen_rtx_MEM (Pmode, XEXP (mem, 0));
adjust_address again.
Post by Ilya Enkovich
+ emit_move_insn (target, hmem);
+
+ return target;
+ }
+
+ {
+ rtx mem, hmem;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (!MEM_P (op0))
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+ else
+ mem = op0;
No negative conditions.
Post by Ilya Enkovich
+ /* Generate mem expression to access UB and load it. */
+ hmem = gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode, XEXP (mem, 0),
+ GEN_INT (GET_MODE_SIZE (Pmode))));
adjust_address again.
Post by Ilya Enkovich
+ emit_move_insn (target, hmem);
+
+ /* We need to inverse all bits of UB. */
+ emit_move_insn (target, gen_rtx_NOT (Pmode, target));
Use emit_simple_unop here.
Post by Ilya Enkovich
+
+ return target;
+ }
+
icode = (fcode == IX86_BUILTIN_MASKMOVQ
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index a38c5d1..ededa67 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2317,6 +2317,7 @@ enum ix86_stack_slot
SLOT_CW_FLOOR,
SLOT_CW_CEIL,
SLOT_CW_MASK_PM,
+ SLOT_BND_STORED,
MAX_386_STACK_LOCALS
};
Uros.
Thanks for your comments. Below is a fixed verison.

Ilya
--
2014-09-17 Ilya Enkovich <***@intel.com>

* config/i386/i386-builtin-types.def (BND): New.
(ULONG): New.
(BND_FTYPE_PCVOID_ULONG): New.
(VOID_FTYPE_BND_PCVOID): New.
(VOID_FTYPE_PCVOID_PCVOID_BND): New.
(BND_FTYPE_PCVOID_PCVOID): New.
(BND_FTYPE_PCVOID): New.
(BND_FTYPE_BND_BND): New.
(PVOID_FTYPE_PVOID_PVOID_ULONG): New.
(PVOID_FTYPE_PCVOID_BND_ULONG): New.
(ULONG_FTYPE_VOID): New.
(PVOID_FTYPE_BND): New.
* config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
(ix86_builtins): Add
IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
IX86_BUILTIN_BNDUPPER.
(builtin_isa): Add leaf_p and nothrow_p fields.
(def_builtin): Initialize leaf_p and nothrow_p.
(ix86_add_new_builtins): Handle leaf_p and nothrow_p
flags.
(bdesc_mpx): New.
(bdesc_mpx_const): New.
(ix86_init_mpx_builtins): New.
(ix86_init_builtins): Call ix86_init_mpx_builtins.
(ix86_emit_cmove): New.
(ix86_emit_move_max): New.
(ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.


diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 35c0035..989297a 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -47,6 +47,7 @@ DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
DEF_PRIMITIVE_TYPE (QI, char_type_node)
DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
+DEF_PRIMITIVE_TYPE (BND, pointer_bounds_type_node)
# ??? Logically this should be intDI_type_node, but that maps to "long"
# with 64-bit, and that's not how the emmintrin.h is written. Again,
# changing this would change name mangling.
@@ -60,6 +61,7 @@ DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
DEF_PRIMITIVE_TYPE (INT, integer_type_node)
DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
DEF_PRIMITIVE_TYPE (UNSIGNED, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (ULONG, long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (LONGLONG, long_long_integer_type_node)
DEF_PRIMITIVE_TYPE (ULONGLONG, long_long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
@@ -806,3 +808,15 @@ DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, TF)
DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, TF)
+
+# MPX builtins
+DEF_FUNCTION_TYPE (BND, PCVOID, ULONG)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, BND, BND)
+DEF_FUNCTION_TYPE (PVOID, PVOID, PVOID, ULONG)
+DEF_FUNCTION_TYPE (PVOID, PCVOID, BND, ULONG)
+DEF_FUNCTION_TYPE (ULONG, VOID)
+DEF_FUNCTION_TYPE (PVOID, BND)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d0f58b1..6082f86 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -85,6 +85,8 @@ along with GCC; see the file COPYING3. If not see
#include "tree-vectorizer.h"
#include "shrink-wrap.h"
#include "builtins.h"
+#include "tree-chkp.h"
+#include "rtl-chkp.h"

static rtx legitimize_dllimport_symbol (rtx, bool);
static rtx legitimize_pe_coff_extern_decl (rtx, bool);
@@ -28775,6 +28777,19 @@ enum ix86_builtins
IX86_BUILTIN_XABORT,
IX86_BUILTIN_XTEST,

+ /* MPX */
+ IX86_BUILTIN_BNDMK,
+ IX86_BUILTIN_BNDSTX,
+ IX86_BUILTIN_BNDLDX,
+ IX86_BUILTIN_BNDCL,
+ IX86_BUILTIN_BNDCU,
+ IX86_BUILTIN_BNDRET,
+ IX86_BUILTIN_BNDNARROW,
+ IX86_BUILTIN_BNDINT,
+ IX86_BUILTIN_SIZEOF,
+ IX86_BUILTIN_BNDLOWER,
+ IX86_BUILTIN_BNDUPPER,
+
/* BMI instructions. */
IX86_BUILTIN_BEXTR32,
IX86_BUILTIN_BEXTR64,
@@ -28848,6 +28863,8 @@ struct builtin_isa {
enum ix86_builtin_func_type tcode; /* type to use in the declaration */
HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
bool const_p; /* true if the declaration is constant */
+ bool leaf_p; /* true if the declaration has leaf attribute */
+ bool nothrow_p; /* true if the declaration has nothrow attribute */
bool set_and_not_built_p;
};

@@ -28899,6 +28916,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
ix86_builtins[(int) code] = NULL_TREE;
ix86_builtins_isa[(int) code].tcode = tcode;
ix86_builtins_isa[(int) code].name = name;
+ ix86_builtins_isa[(int) code].leaf_p = false;
+ ix86_builtins_isa[(int) code].nothrow_p = false;
ix86_builtins_isa[(int) code].const_p = false;
ix86_builtins_isa[(int) code].set_and_not_built_p = true;
}
@@ -28949,6 +28968,11 @@ ix86_add_new_builtins (HOST_WIDE_INT isa)
ix86_builtins[i] = decl;
if (ix86_builtins_isa[i].const_p)
TREE_READONLY (decl) = 1;
+ if (ix86_builtins_isa[i].leaf_p)
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ if (ix86_builtins_isa[i].nothrow_p)
+ TREE_NOTHROW (decl) = 1;
}
}
}
@@ -30402,6 +30426,27 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
};

+/* Bultins for MPX. */
+static const struct builtin_description bdesc_mpx[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+};
+
+/* Const builtins for MPX. */
+static const struct builtin_description bdesc_mpx_const[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
+};
+
/* FMA4 and XOP. */
#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
@@ -31250,6 +31295,67 @@ ix86_init_mmx_sse_builtins (void)
}
}

+static void
+ix86_init_mpx_builtins ()
+{
+ const struct builtin_description * d;
+ enum ix86_builtin_func_type ftype;
+ tree decl;
+ size_t i;
+
+ for (i = 0, d = bdesc_mpx;
+ i < ARRAY_SIZE (bdesc_mpx);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin (d->mask, d->name, ftype, d->code);
+
+ /* With no leaf and nothrow flags for MPX builtins
+ abnormal edges may follow its call when setjmp
+ presents in the function. Since we may have a lot
+ of MPX builtins calls it causes lots of useless
+ edges and enormous PHI nodes. To avoid this we mark
+ MPX builtins as leaf and nothrow. */
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+
+ for (i = 0, d = bdesc_mpx_const;
+ i < ARRAY_SIZE (bdesc_mpx_const);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin_const (d->mask, d->name, ftype, d->code);
+
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+}
+
/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
to return a pointer to VERSION_DECL if the outcome of the expression
formed by PREDICATE_CHAIN is true. This function will be called during
@@ -32788,6 +32894,7 @@ ix86_init_builtins (void)

ix86_init_tm_builtins ();
ix86_init_mmx_sse_builtins ();
+ ix86_init_mpx_builtins ();

if (TARGET_LP64)
ix86_init_builtins_va_builtins_abi ();
@@ -35053,6 +35160,37 @@ ix86_expand_vec_set_builtin (tree exp)
return target;
}

+/* Emit conditional move of SRC to DST with condition
+ OP1 CODE OP2. */
+static void
+ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
+{
+ rtx t;
+
+ if (TARGET_CMOVE)
+ {
+ t = ix86_expand_compare (code, op1, op2);
+ emit_insn (gen_rtx_SET (VOIDmode, dst,
+ gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
+ src, dst)));
+ }
+ else
+ {
+ rtx nomove = gen_label_rtx ();
+ emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
+ const0_rtx, GET_MODE (op1), 1, nomove);
+ emit_move_insn (dst, src);
+ emit_label (nomove);
+ }
+}
+
+/* Choose max of DST and SRC and put it to DST. */
+static void
+ix86_emit_move_max (rtx dst, rtx src)
+{
+ ix86_emit_cmove (dst, src, LTU, dst, src);
+}
+
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
@@ -35118,6 +35256,339 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,

switch (fcode)
{
+ case IX86_BUILTIN_BNDMK:
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, Pmode))
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ /* Builtin arg1 is size of block but instruction op1 should
+ be (size - 1). */
+ op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
+ op1, 1, OPTAB_DIRECT);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, op0, op1)
+ : gen_bnd32_mk (target, op0, op1));
+ return target;
+
+ case IX86_BUILTIN_BNDSTX:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+ if (!register_operand (op2, Pmode))
+ op2 = ix86_zero_extend_to_Pmode (op2);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_stx (op2, op0, op1)
+ : gen_bnd32_stx (op2, op0, op1));
+ return 0;
+
+ case IX86_BUILTIN_BNDLDX:
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, Pmode))
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_ldx (target, op0, op1)
+ : gen_bnd32_ldx (target, op0, op1));
+ return target;
+
+ case IX86_BUILTIN_BNDCL:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cl (op1, op0)
+ : gen_bnd32_cl (op1, op0));
+ return 0;
+
+ case IX86_BUILTIN_BNDCU:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cu (op1, op0)
+ : gen_bnd32_cu (op1, op0));
+ return 0;
+
+ case IX86_BUILTIN_BNDRET:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == SSA_NAME);
+ target = chkp_get_rtl_bounds (arg0);
+ /* If no bounds were specified for returned value,
+ then use INIT bounds. It usually happens when
+ some built-in function is expanded. */
+ if (!target)
+ {
+ rtx t1 = gen_reg_rtx (Pmode);
+ rtx t2 = gen_reg_rtx (Pmode);
+ target = gen_reg_rtx (BNDmode);
+ emit_move_insn (t1, const0_rtx);
+ emit_move_insn (t2, constm1_rtx);
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, t1, t2)
+ : gen_bnd32_mk (target, t1, t2));
+ }
+ gcc_assert (target && REG_P (target));
+ return target;
+
+ case IX86_BUILTIN_BNDNARROW:
+ {
+ rtx m1, m1h1, m1h2, lb, ub, t1;
+
+ /* Return value and lb. */
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ /* Bounds. */
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ /* Size. */
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ lb = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ /* Size was passed but we need to use (size - 1) as for bndmk. */
+ op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
+ op2, 1, OPTAB_DIRECT);
+
+ /* Add LB to size and inverse to get UB. */
+ op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
+ op2, 1, OPTAB_DIRECT);
+ ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
+
+ if (!register_operand (lb, Pmode))
+ lb = ix86_zero_extend_to_Pmode (lb);
+ if (!register_operand (op2, Pmode))
+ ub = ix86_zero_extend_to_Pmode (op2);
+
+ /* We need to move bounds to memory before any computations. */
+ if (MEM_P (op1))
+ m1 = op1;
+ else
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (m1, op1);
+ }
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = adjust_address (m1, Pmode, 0);
+ m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
+
+ t1 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ ix86_emit_move_max (t1, lb);
+ emit_move_insn (m1h1, t1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ ix86_emit_move_max (t1, ub);
+ emit_move_insn (m1h2, t1);
+
+ op2 = gen_reg_rtx (BNDmode);
+ emit_move_insn (op2, m1);
+
+ return chkp_join_splitted_slot (lb, op2);
+ }
+
+ case IX86_BUILTIN_BNDINT:
+ {
+ rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
+
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ res = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ rh1 = adjust_address (res, Pmode, 0);
+ rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
+
+ /* Put first bounds to temporaries. */
+ lb1 = gen_reg_rtx (Pmode);
+ ub1 = gen_reg_rtx (Pmode);
+ if (MEM_P (op0))
+ {
+ emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
+ emit_move_insn (ub1, adjust_address (op0, Pmode,
+ GET_MODE_SIZE (Pmode)));
+ }
+ else
+ {
+ emit_move_insn (res, op0);
+ emit_move_insn (lb1, rh1);
+ emit_move_insn (ub1, rh2);
+ }
+
+ /* Put second bounds to temporaries. */
+ lb2 = gen_reg_rtx (Pmode);
+ ub2 = gen_reg_rtx (Pmode);
+ if (MEM_P (op1))
+ {
+ emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
+ emit_move_insn (ub2, adjust_address (op1, Pmode,
+ GET_MODE_SIZE (Pmode)));
+ }
+ else
+ {
+ emit_move_insn (res, op1);
+ emit_move_insn (lb2, rh1);
+ emit_move_insn (ub2, rh2);
+ }
+
+ /* Compute LB. */
+ ix86_emit_move_max (lb1, lb2);
+ emit_move_insn (rh1, lb1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ ix86_emit_move_max (ub1, ub2);
+ emit_move_insn (rh2, ub1);
+
+ emit_move_insn (target, res);
+
+ return target;
+ }
+
+ case IX86_BUILTIN_SIZEOF:
+ {
+ tree name;
+ rtx symbol;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == VAR_DECL);
+
+ name = DECL_ASSEMBLER_NAME (arg0);
+ symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
+
+ emit_insn (Pmode == SImode
+ ? gen_move_size_reloc_si (target, symbol)
+ : gen_move_size_reloc_di (target, symbol));
+
+ return target;
+ }
+
+ case IX86_BUILTIN_BNDLOWER:
+ {
+ rtx mem, hmem;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (MEM_P (op0))
+ mem = op0;
+ else
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+
+ /* Generate mem expression to access LB and load it. */
+ hmem = adjust_address (mem, Pmode, 0);
+ emit_move_insn (target, hmem);
+
+ return target;
+ }
+
+ case IX86_BUILTIN_BNDUPPER:
+ {
+ rtx mem, hmem, res;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (MEM_P (op0))
+ mem = op0;
+ else
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+
+ /* Generate mem expression to access UB. */
+ hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
+ /* We need to inverse all bits of UB. */
+ res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
+ if (res != target)
+ emit_move_insn (target, res);
+
+ return target;
+ }
+
case IX86_BUILTIN_MASKMOVQ:
case IX86_BUILTIN_MASKMOVDQU:
icode = (fcode == IX86_BUILTIN_MASKMOVQ
Uros Bizjak
2014-09-18 17:33:51 UTC
Permalink
Post by Ilya Enkovich
Thanks for your comments. Below is a fixed verison.
Ilya
--
* config/i386/i386-builtin-types.def (BND): New.
(ULONG): New.
(BND_FTYPE_PCVOID_ULONG): New.
(VOID_FTYPE_BND_PCVOID): New.
(VOID_FTYPE_PCVOID_PCVOID_BND): New.
(BND_FTYPE_PCVOID_PCVOID): New.
(BND_FTYPE_PCVOID): New.
(BND_FTYPE_BND_BND): New.
(PVOID_FTYPE_PVOID_PVOID_ULONG): New.
(PVOID_FTYPE_PCVOID_BND_ULONG): New.
(ULONG_FTYPE_VOID): New.
(PVOID_FTYPE_BND): New.
* config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
(ix86_builtins): Add
IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
IX86_BUILTIN_BNDUPPER.
(builtin_isa): Add leaf_p and nothrow_p fields.
(def_builtin): Initialize leaf_p and nothrow_p.
(ix86_add_new_builtins): Handle leaf_p and nothrow_p
flags.
(bdesc_mpx): New.
(bdesc_mpx_const): New.
(ix86_init_mpx_builtins): New.
(ix86_init_builtins): Call ix86_init_mpx_builtins.
(ix86_emit_cmove): New.
(ix86_emit_move_max): New.
(ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.
OK with a few nits below.

Thanks,
Uros.
Post by Ilya Enkovich
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 35c0035..989297a 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -47,6 +47,7 @@ DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
DEF_PRIMITIVE_TYPE (QI, char_type_node)
DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
+DEF_PRIMITIVE_TYPE (BND, pointer_bounds_type_node)
# ??? Logically this should be intDI_type_node, but that maps to "long"
# with 64-bit, and that's not how the emmintrin.h is written. Again,
# changing this would change name mangling.
@@ -60,6 +61,7 @@ DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
DEF_PRIMITIVE_TYPE (INT, integer_type_node)
DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
DEF_PRIMITIVE_TYPE (UNSIGNED, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (ULONG, long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (LONGLONG, long_long_integer_type_node)
DEF_PRIMITIVE_TYPE (ULONGLONG, long_long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
@@ -806,3 +808,15 @@ DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, TF)
DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, TF)
+
+# MPX builtins
+DEF_FUNCTION_TYPE (BND, PCVOID, ULONG)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, BND, BND)
+DEF_FUNCTION_TYPE (PVOID, PVOID, PVOID, ULONG)
+DEF_FUNCTION_TYPE (PVOID, PCVOID, BND, ULONG)
+DEF_FUNCTION_TYPE (ULONG, VOID)
+DEF_FUNCTION_TYPE (PVOID, BND)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d0f58b1..6082f86 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -85,6 +85,8 @@ along with GCC; see the file COPYING3. If not see
#include "tree-vectorizer.h"
#include "shrink-wrap.h"
#include "builtins.h"
+#include "tree-chkp.h"
+#include "rtl-chkp.h"
static rtx legitimize_dllimport_symbol (rtx, bool);
static rtx legitimize_pe_coff_extern_decl (rtx, bool);
@@ -28775,6 +28777,19 @@ enum ix86_builtins
IX86_BUILTIN_XABORT,
IX86_BUILTIN_XTEST,
+ /* MPX */
+ IX86_BUILTIN_BNDMK,
+ IX86_BUILTIN_BNDSTX,
+ IX86_BUILTIN_BNDLDX,
+ IX86_BUILTIN_BNDCL,
+ IX86_BUILTIN_BNDCU,
+ IX86_BUILTIN_BNDRET,
+ IX86_BUILTIN_BNDNARROW,
+ IX86_BUILTIN_BNDINT,
+ IX86_BUILTIN_SIZEOF,
+ IX86_BUILTIN_BNDLOWER,
+ IX86_BUILTIN_BNDUPPER,
+
/* BMI instructions. */
IX86_BUILTIN_BEXTR32,
IX86_BUILTIN_BEXTR64,
@@ -28848,6 +28863,8 @@ struct builtin_isa {
enum ix86_builtin_func_type tcode; /* type to use in the declaration */
HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
bool const_p; /* true if the declaration is constant */
+ bool leaf_p; /* true if the declaration has leaf attribute */
+ bool nothrow_p; /* true if the declaration has nothrow attribute */
bool set_and_not_built_p;
};
@@ -28899,6 +28916,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
ix86_builtins[(int) code] = NULL_TREE;
ix86_builtins_isa[(int) code].tcode = tcode;
ix86_builtins_isa[(int) code].name = name;
+ ix86_builtins_isa[(int) code].leaf_p = false;
+ ix86_builtins_isa[(int) code].nothrow_p = false;
ix86_builtins_isa[(int) code].const_p = false;
ix86_builtins_isa[(int) code].set_and_not_built_p = true;
}
@@ -28949,6 +28968,11 @@ ix86_add_new_builtins (HOST_WIDE_INT isa)
ix86_builtins[i] = decl;
if (ix86_builtins_isa[i].const_p)
TREE_READONLY (decl) = 1;
+ if (ix86_builtins_isa[i].leaf_p)
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ if (ix86_builtins_isa[i].nothrow_p)
+ TREE_NOTHROW (decl) = 1;
}
}
}
@@ -30402,6 +30426,27 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
};
+/* Bultins for MPX. */
+static const struct builtin_description bdesc_mpx[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+};
+
+/* Const builtins for MPX. */
+static const struct builtin_description bdesc_mpx_const[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
+};
+
/* FMA4 and XOP. */
#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
@@ -31250,6 +31295,67 @@ ix86_init_mmx_sse_builtins (void)
}
}
+static void
+ix86_init_mpx_builtins ()
+{
+ const struct builtin_description * d;
+ enum ix86_builtin_func_type ftype;
+ tree decl;
+ size_t i;
+
+ for (i = 0, d = bdesc_mpx;
+ i < ARRAY_SIZE (bdesc_mpx);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin (d->mask, d->name, ftype, d->code);
+
+ /* With no leaf and nothrow flags for MPX builtins
+ abnormal edges may follow its call when setjmp
+ presents in the function. Since we may have a lot
+ of MPX builtins calls it causes lots of useless
+ edges and enormous PHI nodes. To avoid this we mark
+ MPX builtins as leaf and nothrow. */
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+
+ for (i = 0, d = bdesc_mpx_const;
+ i < ARRAY_SIZE (bdesc_mpx_const);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin_const (d->mask, d->name, ftype, d->code);
+
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+}
+
/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
to return a pointer to VERSION_DECL if the outcome of the expression
formed by PREDICATE_CHAIN is true. This function will be called during
@@ -32788,6 +32894,7 @@ ix86_init_builtins (void)
ix86_init_tm_builtins ();
ix86_init_mmx_sse_builtins ();
+ ix86_init_mpx_builtins ();
if (TARGET_LP64)
ix86_init_builtins_va_builtins_abi ();
@@ -35053,6 +35160,37 @@ ix86_expand_vec_set_builtin (tree exp)
return target;
}
+/* Emit conditional move of SRC to DST with condition
+ OP1 CODE OP2. */
+static void
+ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
+{
+ rtx t;
+
+ if (TARGET_CMOVE)
+ {
+ t = ix86_expand_compare (code, op1, op2);
+ emit_insn (gen_rtx_SET (VOIDmode, dst,
+ gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
+ src, dst)));
+ }
+ else
+ {
+ rtx nomove = gen_label_rtx ();
+ emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
+ const0_rtx, GET_MODE (op1), 1, nomove);
+ emit_move_insn (dst, src);
+ emit_label (nomove);
+ }
+}
+
+/* Choose max of DST and SRC and put it to DST. */
+static void
+ix86_emit_move_max (rtx dst, rtx src)
+{
+ ix86_emit_cmove (dst, src, LTU, dst, src);
+}
+
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
@@ -35118,6 +35256,339 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
switch (fcode)
{
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, Pmode))
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ /* Builtin arg1 is size of block but instruction op1 should
+ be (size - 1). */
+ op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
+ op1, 1, OPTAB_DIRECT);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, op0, op1)
+ : gen_bnd32_mk (target, op0, op1));
+ return target;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+ if (!register_operand (op2, Pmode))
+ op2 = ix86_zero_extend_to_Pmode (op2);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_stx (op2, op0, op1)
+ : gen_bnd32_stx (op2, op0, op1));
+ return 0;
+
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, Pmode))
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_ldx (target, op0, op1)
+ : gen_bnd32_ldx (target, op0, op1));
+ return target;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cl (op1, op0)
+ : gen_bnd32_cl (op1, op0));
+ return 0;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cu (op1, op0)
+ : gen_bnd32_cu (op1, op0));
+ return 0;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == SSA_NAME);
+ target = chkp_get_rtl_bounds (arg0);
Please add vertical space here ...
Post by Ilya Enkovich
+ /* If no bounds were specified for returned value,
+ then use INIT bounds. It usually happens when
+ some built-in function is expanded. */
+ if (!target)
+ {
+ rtx t1 = gen_reg_rtx (Pmode);
+ rtx t2 = gen_reg_rtx (Pmode);
+ target = gen_reg_rtx (BNDmode);
+ emit_move_insn (t1, const0_rtx);
+ emit_move_insn (t2, constm1_rtx);
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, t1, t2)
+ : gen_bnd32_mk (target, t1, t2));
+ }
... and here.
Post by Ilya Enkovich
+ gcc_assert (target && REG_P (target));
+ return target;
+
+ {
+ rtx m1, m1h1, m1h2, lb, ub, t1;
+
+ /* Return value and lb. */
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ /* Bounds. */
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ /* Size. */
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ lb = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ /* Size was passed but we need to use (size - 1) as for bndmk. */
+ op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
+ op2, 1, OPTAB_DIRECT);
+
+ /* Add LB to size and inverse to get UB. */
+ op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
+ op2, 1, OPTAB_DIRECT);
+ ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
+
+ if (!register_operand (lb, Pmode))
+ lb = ix86_zero_extend_to_Pmode (lb);
+ if (!register_operand (op2, Pmode))
+ ub = ix86_zero_extend_to_Pmode (op2);
+
+ /* We need to move bounds to memory before any computations. */
+ if (MEM_P (op1))
+ m1 = op1;
+ else
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (m1, op1);
+ }
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = adjust_address (m1, Pmode, 0);
+ m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
+
+ t1 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ ix86_emit_move_max (t1, lb);
+ emit_move_insn (m1h1, t1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ ix86_emit_move_max (t1, ub);
+ emit_move_insn (m1h2, t1);
+
+ op2 = gen_reg_rtx (BNDmode);
+ emit_move_insn (op2, m1);
+
+ return chkp_join_splitted_slot (lb, op2);
+ }
+
+ {
+ rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
+
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ res = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ rh1 = adjust_address (res, Pmode, 0);
+ rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
+
+ /* Put first bounds to temporaries. */
+ lb1 = gen_reg_rtx (Pmode);
+ ub1 = gen_reg_rtx (Pmode);
+ if (MEM_P (op0))
+ {
+ emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
+ emit_move_insn (ub1, adjust_address (op0, Pmode,
+ GET_MODE_SIZE (Pmode)));
+ }
+ else
+ {
+ emit_move_insn (res, op0);
+ emit_move_insn (lb1, rh1);
+ emit_move_insn (ub1, rh2);
+ }
+
+ /* Put second bounds to temporaries. */
+ lb2 = gen_reg_rtx (Pmode);
+ ub2 = gen_reg_rtx (Pmode);
+ if (MEM_P (op1))
+ {
+ emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
+ emit_move_insn (ub2, adjust_address (op1, Pmode,
+ GET_MODE_SIZE (Pmode)));
+ }
+ else
+ {
+ emit_move_insn (res, op1);
+ emit_move_insn (lb2, rh1);
+ emit_move_insn (ub2, rh2);
+ }
+
+ /* Compute LB. */
+ ix86_emit_move_max (lb1, lb2);
+ emit_move_insn (rh1, lb1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ ix86_emit_move_max (ub1, ub2);
+ emit_move_insn (rh2, ub1);
+
+ emit_move_insn (target, res);
+
+ return target;
+ }
+
+ {
+ tree name;
+ rtx symbol;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == VAR_DECL);
+
+ name = DECL_ASSEMBLER_NAME (arg0);
+ symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
+
+ emit_insn (Pmode == SImode
+ ? gen_move_size_reloc_si (target, symbol)
+ : gen_move_size_reloc_di (target, symbol));
+
+ return target;
+ }
+
+ {
+ rtx mem, hmem;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (MEM_P (op0))
+ mem = op0;
+ else
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+
+ /* Generate mem expression to access LB and load it. */
+ hmem = adjust_address (mem, Pmode, 0);
+ emit_move_insn (target, hmem);
+
+ return target;
+ }
+
+ {
+ rtx mem, hmem, res;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (MEM_P (op0))
+ mem = op0;
+ else
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+
+ /* Generate mem expression to access UB. */
+ hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
Vertical space here ...
Post by Ilya Enkovich
+ /* We need to inverse all bits of UB. */
+ res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
... and here.
Post by Ilya Enkovich
+ if (res != target)
+ emit_move_insn (target, res);
+
+ return target;
+ }
+
icode = (fcode == IX86_BUILTIN_MASKMOVQ
Ilya Enkovich
2014-09-19 07:25:27 UTC
Permalink
Post by Uros Bizjak
Post by Ilya Enkovich
Thanks for your comments. Below is a fixed verison.
Ilya
--
OK with a few nits below.
Thanks,
Uros.
Post by Ilya Enkovich
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == SSA_NAME);
+ target = chkp_get_rtl_bounds (arg0);
Please add vertical space here ...
Post by Ilya Enkovich
+ /* If no bounds were specified for returned value,
+ then use INIT bounds. It usually happens when
+ some built-in function is expanded. */
+ if (!target)
+ {
+ rtx t1 = gen_reg_rtx (Pmode);
+ rtx t2 = gen_reg_rtx (Pmode);
+ target = gen_reg_rtx (BNDmode);
+ emit_move_insn (t1, const0_rtx);
+ emit_move_insn (t2, constm1_rtx);
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, t1, t2)
+ : gen_bnd32_mk (target, t1, t2));
+ }
... and here.
Post by Ilya Enkovich
+ gcc_assert (target && REG_P (target));
+ return target;
+
+
+ /* Generate mem expression to access UB. */
+ hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
Vertical space here ...
Post by Ilya Enkovich
+ /* We need to inverse all bits of UB. */
+ res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
... and here.
Post by Ilya Enkovich
+ if (res != target)
+ emit_move_insn (target, res);
+
Thank you for detailed review of this patch. Here is a final version.

Ilya
--
2014-09-19 Ilya Enkovich <***@intel.com>

* config/i386/i386-builtin-types.def (BND): New.
(ULONG): New.
(BND_FTYPE_PCVOID_ULONG): New.
(VOID_FTYPE_BND_PCVOID): New.
(VOID_FTYPE_PCVOID_PCVOID_BND): New.
(BND_FTYPE_PCVOID_PCVOID): New.
(BND_FTYPE_PCVOID): New.
(BND_FTYPE_BND_BND): New.
(PVOID_FTYPE_PVOID_PVOID_ULONG): New.
(PVOID_FTYPE_PCVOID_BND_ULONG): New.
(ULONG_FTYPE_VOID): New.
(PVOID_FTYPE_BND): New.
* config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
(ix86_builtins): Add
IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
IX86_BUILTIN_BNDUPPER.
(builtin_isa): Add leaf_p and nothrow_p fields.
(def_builtin): Initialize leaf_p and nothrow_p.
(ix86_add_new_builtins): Handle leaf_p and nothrow_p
flags.
(bdesc_mpx): New.
(bdesc_mpx_const): New.
(ix86_init_mpx_builtins): New.
(ix86_init_builtins): Call ix86_init_mpx_builtins.
(ix86_emit_cmove): New.
(ix86_emit_move_max): New.
(ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.


diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 35c0035..989297a 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -47,6 +47,7 @@ DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
DEF_PRIMITIVE_TYPE (QI, char_type_node)
DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
+DEF_PRIMITIVE_TYPE (BND, pointer_bounds_type_node)
# ??? Logically this should be intDI_type_node, but that maps to "long"
# with 64-bit, and that's not how the emmintrin.h is written. Again,
# changing this would change name mangling.
@@ -60,6 +61,7 @@ DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
DEF_PRIMITIVE_TYPE (INT, integer_type_node)
DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
DEF_PRIMITIVE_TYPE (UNSIGNED, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (ULONG, long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (LONGLONG, long_long_integer_type_node)
DEF_PRIMITIVE_TYPE (ULONGLONG, long_long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
@@ -806,3 +808,15 @@ DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, TF)
DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, TF)
+
+# MPX builtins
+DEF_FUNCTION_TYPE (BND, PCVOID, ULONG)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, BND, BND)
+DEF_FUNCTION_TYPE (PVOID, PVOID, PVOID, ULONG)
+DEF_FUNCTION_TYPE (PVOID, PCVOID, BND, ULONG)
+DEF_FUNCTION_TYPE (ULONG, VOID)
+DEF_FUNCTION_TYPE (PVOID, BND)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d0f58b1..d493983 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -85,6 +85,8 @@ along with GCC; see the file COPYING3. If not see
#include "tree-vectorizer.h"
#include "shrink-wrap.h"
#include "builtins.h"
+#include "tree-chkp.h"
+#include "rtl-chkp.h"

static rtx legitimize_dllimport_symbol (rtx, bool);
static rtx legitimize_pe_coff_extern_decl (rtx, bool);
@@ -28775,6 +28777,19 @@ enum ix86_builtins
IX86_BUILTIN_XABORT,
IX86_BUILTIN_XTEST,

+ /* MPX */
+ IX86_BUILTIN_BNDMK,
+ IX86_BUILTIN_BNDSTX,
+ IX86_BUILTIN_BNDLDX,
+ IX86_BUILTIN_BNDCL,
+ IX86_BUILTIN_BNDCU,
+ IX86_BUILTIN_BNDRET,
+ IX86_BUILTIN_BNDNARROW,
+ IX86_BUILTIN_BNDINT,
+ IX86_BUILTIN_SIZEOF,
+ IX86_BUILTIN_BNDLOWER,
+ IX86_BUILTIN_BNDUPPER,
+
/* BMI instructions. */
IX86_BUILTIN_BEXTR32,
IX86_BUILTIN_BEXTR64,
@@ -28848,6 +28863,8 @@ struct builtin_isa {
enum ix86_builtin_func_type tcode; /* type to use in the declaration */
HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
bool const_p; /* true if the declaration is constant */
+ bool leaf_p; /* true if the declaration has leaf attribute */
+ bool nothrow_p; /* true if the declaration has nothrow attribute */
bool set_and_not_built_p;
};

@@ -28899,6 +28916,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
ix86_builtins[(int) code] = NULL_TREE;
ix86_builtins_isa[(int) code].tcode = tcode;
ix86_builtins_isa[(int) code].name = name;
+ ix86_builtins_isa[(int) code].leaf_p = false;
+ ix86_builtins_isa[(int) code].nothrow_p = false;
ix86_builtins_isa[(int) code].const_p = false;
ix86_builtins_isa[(int) code].set_and_not_built_p = true;
}
@@ -28949,6 +28968,11 @@ ix86_add_new_builtins (HOST_WIDE_INT isa)
ix86_builtins[i] = decl;
if (ix86_builtins_isa[i].const_p)
TREE_READONLY (decl) = 1;
+ if (ix86_builtins_isa[i].leaf_p)
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ if (ix86_builtins_isa[i].nothrow_p)
+ TREE_NOTHROW (decl) = 1;
}
}
}
@@ -30402,6 +30426,27 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
};

+/* Bultins for MPX. */
+static const struct builtin_description bdesc_mpx[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+};
+
+/* Const builtins for MPX. */
+static const struct builtin_description bdesc_mpx_const[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
+};
+
/* FMA4 and XOP. */
#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
@@ -31250,6 +31295,67 @@ ix86_init_mmx_sse_builtins (void)
}
}

+static void
+ix86_init_mpx_builtins ()
+{
+ const struct builtin_description * d;
+ enum ix86_builtin_func_type ftype;
+ tree decl;
+ size_t i;
+
+ for (i = 0, d = bdesc_mpx;
+ i < ARRAY_SIZE (bdesc_mpx);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin (d->mask, d->name, ftype, d->code);
+
+ /* With no leaf and nothrow flags for MPX builtins
+ abnormal edges may follow its call when setjmp
+ presents in the function. Since we may have a lot
+ of MPX builtins calls it causes lots of useless
+ edges and enormous PHI nodes. To avoid this we mark
+ MPX builtins as leaf and nothrow. */
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+
+ for (i = 0, d = bdesc_mpx_const;
+ i < ARRAY_SIZE (bdesc_mpx_const);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin_const (d->mask, d->name, ftype, d->code);
+
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+}
+
/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
to return a pointer to VERSION_DECL if the outcome of the expression
formed by PREDICATE_CHAIN is true. This function will be called during
@@ -32788,6 +32894,7 @@ ix86_init_builtins (void)

ix86_init_tm_builtins ();
ix86_init_mmx_sse_builtins ();
+ ix86_init_mpx_builtins ();

if (TARGET_LP64)
ix86_init_builtins_va_builtins_abi ();
@@ -35053,6 +35160,37 @@ ix86_expand_vec_set_builtin (tree exp)
return target;
}

+/* Emit conditional move of SRC to DST with condition
+ OP1 CODE OP2. */
+static void
+ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
+{
+ rtx t;
+
+ if (TARGET_CMOVE)
+ {
+ t = ix86_expand_compare (code, op1, op2);
+ emit_insn (gen_rtx_SET (VOIDmode, dst,
+ gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
+ src, dst)));
+ }
+ else
+ {
+ rtx nomove = gen_label_rtx ();
+ emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
+ const0_rtx, GET_MODE (op1), 1, nomove);
+ emit_move_insn (dst, src);
+ emit_label (nomove);
+ }
+}
+
+/* Choose max of DST and SRC and put it to DST. */
+static void
+ix86_emit_move_max (rtx dst, rtx src)
+{
+ ix86_emit_cmove (dst, src, LTU, dst, src);
+}
+
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
@@ -35118,6 +35256,343 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,

switch (fcode)
{
+ case IX86_BUILTIN_BNDMK:
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, Pmode))
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ /* Builtin arg1 is size of block but instruction op1 should
+ be (size - 1). */
+ op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
+ op1, 1, OPTAB_DIRECT);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, op0, op1)
+ : gen_bnd32_mk (target, op0, op1));
+ return target;
+
+ case IX86_BUILTIN_BNDSTX:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+ if (!register_operand (op2, Pmode))
+ op2 = ix86_zero_extend_to_Pmode (op2);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_stx (op2, op0, op1)
+ : gen_bnd32_stx (op2, op0, op1));
+ return 0;
+
+ case IX86_BUILTIN_BNDLDX:
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, Pmode))
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_ldx (target, op0, op1)
+ : gen_bnd32_ldx (target, op0, op1));
+ return target;
+
+ case IX86_BUILTIN_BNDCL:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cl (op1, op0)
+ : gen_bnd32_cl (op1, op0));
+ return 0;
+
+ case IX86_BUILTIN_BNDCU:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cu (op1, op0)
+ : gen_bnd32_cu (op1, op0));
+ return 0;
+
+ case IX86_BUILTIN_BNDRET:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == SSA_NAME);
+ target = chkp_get_rtl_bounds (arg0);
+
+ /* If no bounds were specified for returned value,
+ then use INIT bounds. It usually happens when
+ some built-in function is expanded. */
+ if (!target)
+ {
+ rtx t1 = gen_reg_rtx (Pmode);
+ rtx t2 = gen_reg_rtx (Pmode);
+ target = gen_reg_rtx (BNDmode);
+ emit_move_insn (t1, const0_rtx);
+ emit_move_insn (t2, constm1_rtx);
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, t1, t2)
+ : gen_bnd32_mk (target, t1, t2));
+ }
+
+ gcc_assert (target && REG_P (target));
+ return target;
+
+ case IX86_BUILTIN_BNDNARROW:
+ {
+ rtx m1, m1h1, m1h2, lb, ub, t1;
+
+ /* Return value and lb. */
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ /* Bounds. */
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ /* Size. */
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ lb = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ /* Size was passed but we need to use (size - 1) as for bndmk. */
+ op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
+ op2, 1, OPTAB_DIRECT);
+
+ /* Add LB to size and inverse to get UB. */
+ op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
+ op2, 1, OPTAB_DIRECT);
+ ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
+
+ if (!register_operand (lb, Pmode))
+ lb = ix86_zero_extend_to_Pmode (lb);
+ if (!register_operand (op2, Pmode))
+ ub = ix86_zero_extend_to_Pmode (op2);
+
+ /* We need to move bounds to memory before any computations. */
+ if (MEM_P (op1))
+ m1 = op1;
+ else
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (m1, op1);
+ }
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = adjust_address (m1, Pmode, 0);
+ m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
+
+ t1 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ ix86_emit_move_max (t1, lb);
+ emit_move_insn (m1h1, t1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ ix86_emit_move_max (t1, ub);
+ emit_move_insn (m1h2, t1);
+
+ op2 = gen_reg_rtx (BNDmode);
+ emit_move_insn (op2, m1);
+
+ return chkp_join_splitted_slot (lb, op2);
+ }
+
+ case IX86_BUILTIN_BNDINT:
+ {
+ rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
+
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ res = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ rh1 = adjust_address (res, Pmode, 0);
+ rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
+
+ /* Put first bounds to temporaries. */
+ lb1 = gen_reg_rtx (Pmode);
+ ub1 = gen_reg_rtx (Pmode);
+ if (MEM_P (op0))
+ {
+ emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
+ emit_move_insn (ub1, adjust_address (op0, Pmode,
+ GET_MODE_SIZE (Pmode)));
+ }
+ else
+ {
+ emit_move_insn (res, op0);
+ emit_move_insn (lb1, rh1);
+ emit_move_insn (ub1, rh2);
+ }
+
+ /* Put second bounds to temporaries. */
+ lb2 = gen_reg_rtx (Pmode);
+ ub2 = gen_reg_rtx (Pmode);
+ if (MEM_P (op1))
+ {
+ emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
+ emit_move_insn (ub2, adjust_address (op1, Pmode,
+ GET_MODE_SIZE (Pmode)));
+ }
+ else
+ {
+ emit_move_insn (res, op1);
+ emit_move_insn (lb2, rh1);
+ emit_move_insn (ub2, rh2);
+ }
+
+ /* Compute LB. */
+ ix86_emit_move_max (lb1, lb2);
+ emit_move_insn (rh1, lb1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ ix86_emit_move_max (ub1, ub2);
+ emit_move_insn (rh2, ub1);
+
+ emit_move_insn (target, res);
+
+ return target;
+ }
+
+ case IX86_BUILTIN_SIZEOF:
+ {
+ tree name;
+ rtx symbol;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == VAR_DECL);
+
+ name = DECL_ASSEMBLER_NAME (arg0);
+ symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
+
+ emit_insn (Pmode == SImode
+ ? gen_move_size_reloc_si (target, symbol)
+ : gen_move_size_reloc_di (target, symbol));
+
+ return target;
+ }
+
+ case IX86_BUILTIN_BNDLOWER:
+ {
+ rtx mem, hmem;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (MEM_P (op0))
+ mem = op0;
+ else
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+
+ /* Generate mem expression to access LB and load it. */
+ hmem = adjust_address (mem, Pmode, 0);
+ emit_move_insn (target, hmem);
+
+ return target;
+ }
+
+ case IX86_BUILTIN_BNDUPPER:
+ {
+ rtx mem, hmem, res;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (MEM_P (op0))
+ mem = op0;
+ else
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+
+ /* Generate mem expression to access UB. */
+ hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
+
+ /* We need to inverse all bits of UB. */
+ res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
+
+ if (res != target)
+ emit_move_insn (target, res);
+
+ return target;
+ }
+
case IX86_BUILTIN_MASKMOVQ:
case IX86_BUILTIN_MASKMOVDQU:
icode = (fcode == IX86_BUILTIN_MASKMOVQ
Ilya Enkovich
2014-10-09 14:07:59 UTC
Permalink
It appeared I changed a semantics of BNDMK expand when replaced tree operations with rtl ones.

Original code:

+ op1 = expand_normal (fold_build2 (PLUS_EXPR, TREE_TYPE (arg1),
+ arg1, integer_minus_one_node));
+ op1 = force_reg (Pmode, op1);

Modified code:

+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op1, Pmode))
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ /* Builtin arg1 is size of block but instruction op1 should
+ be (size - 1). */
+ op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
+ op1, 1, OPTAB_DIRECT);

The problem is that in the fixed version we may modify value of a pseudo register into which arg1 is expanded which means incorrect value for all following usages of arg1.

Didn't reveal it early because programs surprisingly rarely hit this bug. I do following change to fix it:

op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
- op1, 1, OPTAB_DIRECT);
+ NULL, 1, OPTAB_DIRECT);

Similar problem was also fixed for BNDNARROW. Does it look OK?

Thanks,
Ilya
--
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 9161287..5421ba9 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -47,6 +47,7 @@ DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
DEF_PRIMITIVE_TYPE (QI, char_type_node)
DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
+DEF_PRIMITIVE_TYPE (BND, pointer_bounds_type_node)
# ??? Logically this should be intDI_type_node, but that maps to "long"
# with 64-bit, and that's not how the emmintrin.h is written. Again,
# changing this would change name mangling.
@@ -60,6 +61,7 @@ DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
DEF_PRIMITIVE_TYPE (INT, integer_type_node)
DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
DEF_PRIMITIVE_TYPE (UNSIGNED, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (ULONG, long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (LONGLONG, long_long_integer_type_node)
DEF_PRIMITIVE_TYPE (ULONGLONG, long_long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
@@ -810,3 +812,15 @@ DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, TF)
DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, TF)
DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, TF)
+
+# MPX builtins
+DEF_FUNCTION_TYPE (BND, PCVOID, ULONG)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, BND, BND)
+DEF_FUNCTION_TYPE (PVOID, PVOID, PVOID, ULONG)
+DEF_FUNCTION_TYPE (PVOID, PCVOID, BND, ULONG)
+DEF_FUNCTION_TYPE (ULONG, VOID)
+DEF_FUNCTION_TYPE (PVOID, BND)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2b39ff1..3f464de 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -84,6 +84,8 @@ along with GCC; see the file COPYING3. If not see
#include "tree-vectorizer.h"
#include "shrink-wrap.h"
#include "builtins.h"
+#include "tree-chkp.h"
+#include "rtl-chkp.h"

static rtx legitimize_dllimport_symbol (rtx, bool);
static rtx legitimize_pe_coff_extern_decl (rtx, bool);
@@ -28776,6 +28778,19 @@ enum ix86_builtins
IX86_BUILTIN_XABORT,
IX86_BUILTIN_XTEST,

+ /* MPX */
+ IX86_BUILTIN_BNDMK,
+ IX86_BUILTIN_BNDSTX,
+ IX86_BUILTIN_BNDLDX,
+ IX86_BUILTIN_BNDCL,
+ IX86_BUILTIN_BNDCU,
+ IX86_BUILTIN_BNDRET,
+ IX86_BUILTIN_BNDNARROW,
+ IX86_BUILTIN_BNDINT,
+ IX86_BUILTIN_SIZEOF,
+ IX86_BUILTIN_BNDLOWER,
+ IX86_BUILTIN_BNDUPPER,
+
/* BMI instructions. */
IX86_BUILTIN_BEXTR32,
IX86_BUILTIN_BEXTR64,
@@ -28853,6 +28868,8 @@ struct builtin_isa {
enum ix86_builtin_func_type tcode; /* type to use in the declaration */
HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
bool const_p; /* true if the declaration is constant */
+ bool leaf_p; /* true if the declaration has leaf attribute */
+ bool nothrow_p; /* true if the declaration has nothrow attribute */
bool set_and_not_built_p;
};

@@ -28904,6 +28921,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
ix86_builtins[(int) code] = NULL_TREE;
ix86_builtins_isa[(int) code].tcode = tcode;
ix86_builtins_isa[(int) code].name = name;
+ ix86_builtins_isa[(int) code].leaf_p = false;
+ ix86_builtins_isa[(int) code].nothrow_p = false;
ix86_builtins_isa[(int) code].const_p = false;
ix86_builtins_isa[(int) code].set_and_not_built_p = true;
}
@@ -28954,6 +28973,11 @@ ix86_add_new_builtins (HOST_WIDE_INT isa)
ix86_builtins[i] = decl;
if (ix86_builtins_isa[i].const_p)
TREE_READONLY (decl) = 1;
+ if (ix86_builtins_isa[i].leaf_p)
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ if (ix86_builtins_isa[i].nothrow_p)
+ TREE_NOTHROW (decl) = 1;
}
}
}
@@ -30413,6 +30437,27 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
};

+/* Bultins for MPX. */
+static const struct builtin_description bdesc_mpx[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+};
+
+/* Const builtins for MPX. */
+static const struct builtin_description bdesc_mpx_const[] =
+{
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
+ { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
+};
+
/* FMA4 and XOP. */
#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
@@ -31269,6 +31314,67 @@ ix86_init_mmx_sse_builtins (void)
}
}

+static void
+ix86_init_mpx_builtins ()
+{
+ const struct builtin_description * d;
+ enum ix86_builtin_func_type ftype;
+ tree decl;
+ size_t i;
+
+ for (i = 0, d = bdesc_mpx;
+ i < ARRAY_SIZE (bdesc_mpx);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin (d->mask, d->name, ftype, d->code);
+
+ /* With no leaf and nothrow flags for MPX builtins
+ abnormal edges may follow its call when setjmp
+ presents in the function. Since we may have a lot
+ of MPX builtins calls it causes lots of useless
+ edges and enormous PHI nodes. To avoid this we mark
+ MPX builtins as leaf and nothrow. */
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+
+ for (i = 0, d = bdesc_mpx_const;
+ i < ARRAY_SIZE (bdesc_mpx_const);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ decl = def_builtin_const (d->mask, d->name, ftype, d->code);
+
+ if (decl)
+ {
+ DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+ NULL_TREE);
+ TREE_NOTHROW (decl) = 1;
+ }
+ else
+ {
+ ix86_builtins_isa[(int)d->code].leaf_p = true;
+ ix86_builtins_isa[(int)d->code].nothrow_p = true;
+ }
+ }
+}
+
/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
to return a pointer to VERSION_DECL if the outcome of the expression
formed by PREDICATE_CHAIN is true. This function will be called during
@@ -32807,6 +32913,7 @@ ix86_init_builtins (void)

ix86_init_tm_builtins ();
ix86_init_mmx_sse_builtins ();
+ ix86_init_mpx_builtins ();

if (TARGET_LP64)
ix86_init_builtins_va_builtins_abi ();
@@ -35120,6 +35227,37 @@ ix86_expand_vec_set_builtin (tree exp)
return target;
}

+/* Emit conditional move of SRC to DST with condition
+ OP1 CODE OP2. */
+static void
+ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
+{
+ rtx t;
+
+ if (TARGET_CMOVE)
+ {
+ t = ix86_expand_compare (code, op1, op2);
+ emit_insn (gen_rtx_SET (VOIDmode, dst,
+ gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
+ src, dst)));
+ }
+ else
+ {
+ rtx nomove = gen_label_rtx ();
+ emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
+ const0_rtx, GET_MODE (op1), 1, nomove);
+ emit_move_insn (dst, src);
+ emit_label (nomove);
+ }
+}
+
+/* Choose max of DST and SRC and put it to DST. */
+static void
+ix86_emit_move_max (rtx dst, rtx src)
+{
+ ix86_emit_cmove (dst, src, LTU, dst, src);
+}
+
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
@@ -35185,6 +35323,343 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,

switch (fcode)
{
+ case IX86_BUILTIN_BNDMK:
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, Pmode))
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ /* Builtin arg1 is size of block but instruction op1 should
+ be (size - 1). */
+ op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
+ NULL, 1, OPTAB_DIRECT);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, op0, op1)
+ : gen_bnd32_mk (target, op0, op1));
+ return target;
+
+ case IX86_BUILTIN_BNDSTX:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+ if (!register_operand (op2, Pmode))
+ op2 = ix86_zero_extend_to_Pmode (op2);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_stx (op2, op0, op1)
+ : gen_bnd32_stx (op2, op0, op1));
+ return 0;
+
+ case IX86_BUILTIN_BNDLDX:
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, Pmode))
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_ldx (target, op0, op1)
+ : gen_bnd32_ldx (target, op0, op1));
+ return target;
+
+ case IX86_BUILTIN_BNDCL:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cl (op1, op0)
+ : gen_bnd32_cl (op1, op0));
+ return 0;
+
+ case IX86_BUILTIN_BNDCU:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op0, Pmode))
+ op0 = ix86_zero_extend_to_Pmode (op0);
+ if (!register_operand (op1, BNDmode))
+ op1 = copy_to_mode_reg (BNDmode, op1);
+
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_cu (op1, op0)
+ : gen_bnd32_cu (op1, op0));
+ return 0;
+
+ case IX86_BUILTIN_BNDRET:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == SSA_NAME);
+ target = chkp_get_rtl_bounds (arg0);
+
+ /* If no bounds were specified for returned value,
+ then use INIT bounds. It usually happens when
+ some built-in function is expanded. */
+ if (!target)
+ {
+ rtx t1 = gen_reg_rtx (Pmode);
+ rtx t2 = gen_reg_rtx (Pmode);
+ target = gen_reg_rtx (BNDmode);
+ emit_move_insn (t1, const0_rtx);
+ emit_move_insn (t2, constm1_rtx);
+ emit_insn (BNDmode == BND64mode
+ ? gen_bnd64_mk (target, t1, t2)
+ : gen_bnd32_mk (target, t1, t2));
+ }
+
+ gcc_assert (target && REG_P (target));
+ return target;
+
+ case IX86_BUILTIN_BNDNARROW:
+ {
+ rtx m1, m1h1, m1h2, lb, ub, t1;
+
+ /* Return value and lb. */
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ /* Bounds. */
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ /* Size. */
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ lb = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+
+ /* Size was passed but we need to use (size - 1) as for bndmk. */
+ op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
+ NULL, 1, OPTAB_DIRECT);
+
+ /* Add LB to size and inverse to get UB. */
+ op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
+ op2, 1, OPTAB_DIRECT);
+ ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
+
+ if (!register_operand (lb, Pmode))
+ lb = ix86_zero_extend_to_Pmode (lb);
+ if (!register_operand (ub, Pmode))
+ ub = ix86_zero_extend_to_Pmode (ub);
+
+ /* We need to move bounds to memory before any computations. */
+ if (MEM_P (op1))
+ m1 = op1;
+ else
+ {
+ m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (m1, op1);
+ }
+
+ /* Generate mem expression to be used for access to LB and UB. */
+ m1h1 = adjust_address (m1, Pmode, 0);
+ m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
+
+ t1 = gen_reg_rtx (Pmode);
+
+ /* Compute LB. */
+ emit_move_insn (t1, m1h1);
+ ix86_emit_move_max (t1, lb);
+ emit_move_insn (m1h1, t1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ emit_move_insn (t1, m1h2);
+ ix86_emit_move_max (t1, ub);
+ emit_move_insn (m1h2, t1);
+
+ op2 = gen_reg_rtx (BNDmode);
+ emit_move_insn (op2, m1);
+
+ return chkp_join_splitted_slot (lb, op2);
+ }
+
+ case IX86_BUILTIN_BNDINT:
+ {
+ rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
+
+ if (!target
+ || GET_MODE (target) != BNDmode
+ || !register_operand (target, BNDmode))
+ target = gen_reg_rtx (BNDmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ res = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ rh1 = adjust_address (res, Pmode, 0);
+ rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
+
+ /* Put first bounds to temporaries. */
+ lb1 = gen_reg_rtx (Pmode);
+ ub1 = gen_reg_rtx (Pmode);
+ if (MEM_P (op0))
+ {
+ emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
+ emit_move_insn (ub1, adjust_address (op0, Pmode,
+ GET_MODE_SIZE (Pmode)));
+ }
+ else
+ {
+ emit_move_insn (res, op0);
+ emit_move_insn (lb1, rh1);
+ emit_move_insn (ub1, rh2);
+ }
+
+ /* Put second bounds to temporaries. */
+ lb2 = gen_reg_rtx (Pmode);
+ ub2 = gen_reg_rtx (Pmode);
+ if (MEM_P (op1))
+ {
+ emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
+ emit_move_insn (ub2, adjust_address (op1, Pmode,
+ GET_MODE_SIZE (Pmode)));
+ }
+ else
+ {
+ emit_move_insn (res, op1);
+ emit_move_insn (lb2, rh1);
+ emit_move_insn (ub2, rh2);
+ }
+
+ /* Compute LB. */
+ ix86_emit_move_max (lb1, lb2);
+ emit_move_insn (rh1, lb1);
+
+ /* Compute UB. UB is stored in 1's complement form. Therefore
+ we also use max here. */
+ ix86_emit_move_max (ub1, ub2);
+ emit_move_insn (rh2, ub1);
+
+ emit_move_insn (target, res);
+
+ return target;
+ }
+
+ case IX86_BUILTIN_SIZEOF:
+ {
+ tree name;
+ rtx symbol;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (arg0) == VAR_DECL);
+
+ name = DECL_ASSEMBLER_NAME (arg0);
+ symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
+
+ emit_insn (Pmode == SImode
+ ? gen_move_size_reloc_si (target, symbol)
+ : gen_move_size_reloc_di (target, symbol));
+
+ return target;
+ }
+
+ case IX86_BUILTIN_BNDLOWER:
+ {
+ rtx mem, hmem;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (MEM_P (op0))
+ mem = op0;
+ else
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+
+ /* Generate mem expression to access LB and load it. */
+ hmem = adjust_address (mem, Pmode, 0);
+ emit_move_insn (target, hmem);
+
+ return target;
+ }
+
+ case IX86_BUILTIN_BNDUPPER:
+ {
+ rtx mem, hmem, res;
+
+ if (!target
+ || GET_MODE (target) != Pmode
+ || !register_operand (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ /* We need to move bounds to memory first. */
+ if (MEM_P (op0))
+ mem = op0;
+ else
+ {
+ mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+ emit_move_insn (mem, op0);
+ }
+
+ /* Generate mem expression to access UB. */
+ hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
+
+ /* We need to inverse all bits of UB. */
+ res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
+
+ if (res != target)
+ emit_move_insn (target, res);
+
+ return target;
+ }
+
case IX86_BUILTIN_MASKMOVQ:
case IX86_BUILTIN_MASKMOVDQU:
icode = (fcode == IX86_BUILTIN_MASKMOVQ
Uros Bizjak
2014-10-09 16:54:07 UTC
Permalink
Post by Ilya Enkovich
It appeared I changed a semantics of BNDMK expand when replaced tree operations with rtl ones.
+ op1 = expand_normal (fold_build2 (PLUS_EXPR, TREE_TYPE (arg1),
+ arg1, integer_minus_one_node));
+ op1 = force_reg (Pmode, op1);
+ op1 = expand_normal (arg1);
+
+ if (!register_operand (op1, Pmode))
+ op1 = ix86_zero_extend_to_Pmode (op1);
+
+ /* Builtin arg1 is size of block but instruction op1 should
+ be (size - 1). */
+ op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
+ op1, 1, OPTAB_DIRECT);
The problem is that in the fixed version we may modify value of a pseudo register into which arg1 is expanded which means incorrect value for all following usages of arg1.
op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
- op1, 1, OPTAB_DIRECT);
+ NULL, 1, OPTAB_DIRECT);
Similar problem was also fixed for BNDNARROW. Does it look OK?
I'm not aware of this type of limitation, and there are quite some
similar constructs in i386.c. It is hard to say without the testcase
what happens, perhaps one of RTX experts (CC'd) can advise what is
recommended here.

Uros.

Loading...