Discussion:
RFA: improvement to if-conversion
(too old to reply)
Joern Rennecke
2004-01-29 20:07:15 UTC
Permalink
At -O2 -fomot-frame-pointer, we currently generate quite inefficient code
for sh64-elf for this testcase:

long long
f (long long a, long long b, long long c)
{
long long d;

if (a < b)
{
d = a < c ? a : c;
d++;
}
else
{
d = b < c ? b : c;
d++;
}
return d;
}

_f:
pt .L2, tr0
cmpgt r4, r2, r1
add r2, r63, r6
cmveq r1, r4, r6
ptabs r18, tr1
addi r6, 1, r7
bge r2, r3, tr0
pt .L4, tr0
blink tr0, r63
.align 5
.L2:
cmpgt r4, r3, r1
cmveq r1, r4, r3
addi r3, 1, r7
.L4:
add r7, r63, r2
blink tr1, r63

We can get much better code when we recognize that the if and else blocks
are structurally equivalent, and use a conditional move to get the
right input value:

_f:
cmpgt r3, r2, r1
ptabs r18, tr0
cmveq r1, r3, r2
cmpgt r4, r2, r1
cmveq r1, r4, r2
addi r2, 1, r2
blink tr0, r63

The appended patch implements this optimization. Bootstrapped on
i686-pc-linux-gnu. Regression tested for i686-pc-linux-gnu native and
X sh64-elf.

2004-01-29 J"orn Rennecke <***@superh.com>

Generate 4-insn conditional move sequence for 3-way minimum:
* ifcvt.c (will_merge_if_block, update_regno_block): New functions.
(noce_try_complex_cmove, ifconvert_mark_reg_use): Likewise.
(ifconvert_update_reg_use, struct_equiv): Likewise.
(regno_block, regno_block_max): New variables.
(noce_process_if_block): Call noce_try_complex_cmove.
(cond_exec_process_if_block): Call will_merge_if_block and
will_merge_if_block.
(update_regno_block_info, equiv_info): New structs.
(STRUCT_EQUIV_MAX_LOCAL, STRUCT_EQUIV_MAX_INPUT): New defines.
(if_convert): For flag_expensive_optimizations, initialize regno_block
before the conversion loop, and free regno_block at the end.

Index: ifcvt.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/ifcvt.c,v
retrieving revision 1.137
diff -p -u -r1.137 ifcvt.c
--- ifcvt.c 25 Jan 2004 03:52:42 -0000 1.137
+++ ifcvt.c 29 Jan 2004 17:37:25 -0000
@@ -98,6 +98,7 @@ static int noce_operand_ok (rtx);
static int noce_process_if_block (ce_if_block_t *);
static int process_if_block (ce_if_block_t *);
static void merge_if_block (ce_if_block_t *);
+static void will_merge_if_block (ce_if_block_t *);
static int find_cond_trap (basic_block, edge, edge);
static basic_block find_if_header (basic_block, int);
static int block_jumps_and_fallthru_p (basic_block, basic_block);
@@ -571,6 +572,7 @@ cond_exec_process_if_block (ce_if_block_
n_insns, (n_insns == 1) ? " was" : "s were");

/* Merge the blocks! */
+ will_merge_if_block (ce_info);
merge_if_block (ce_info);
cond_exec_changed_p = TRUE;
return TRUE;
@@ -613,6 +615,20 @@ static rtx noce_get_alt_condition (struc
static int noce_try_minmax (struct noce_if_info *);
static int noce_try_abs (struct noce_if_info *);

+static void update_regno_block (basic_block, basic_block);
+static int noce_try_complex_cmove (ce_if_block_t *,
+ struct noce_if_info *if_info,
+ rtx jump, rtx cond);
+
+/* Allocated and initialized at the start of if_convert, REGNO_BLOCK is
+ an array indexed by register number, with REGNO_BLOCK_MAX being the
+ upper (excluded) bound. For each register thus covered by the array,
+ first is the first basic block - in linear instruction order - in
+ which a register is mentioned, and last is the last basic block in
+ which it is mentioned. */
+static struct { basic_block first, last; } *regno_block;
+static int regno_block_max;
+
/* Helper function for noce_try_store_flag*. */

static rtx
@@ -1846,7 +1862,9 @@ noce_process_if_block (struct ce_if_bloc
if (! insn_a
|| insn_a != last_active_insn (then_bb, FALSE)
|| (set_a = single_set (insn_a)) == NULL_RTX)
- return FALSE;
+ return ((else_bb && regno_block && HAVE_conditional_move)
+ ? noce_try_complex_cmove (ce_info, &if_info, jump, cond)
+ : FALSE);

x = SET_DEST (set_a);
a = SET_SRC (set_a);
@@ -2009,6 +2027,7 @@ noce_process_if_block (struct ce_if_bloc
return FALSE;

success:
+ will_merge_if_block (ce_info);
/* The original sets may now be killed. */
delete_insn (insn_a);

@@ -2076,6 +2095,24 @@ process_if_block (struct ce_if_block * c
return FALSE;
}

+
+/* The blocks described in CE_INFO will be merged. Update REGNO_BLOCK
+ accordingly. */
+static void
+will_merge_if_block (struct ce_if_block * ce_info)
+{
+ basic_block test_bb = ce_info->test_bb; /* test block */
+
+ if (ce_info->then_bb && regno_block)
+ update_regno_block (ce_info->then_bb, test_bb);
+ if (ce_info->else_bb && regno_block)
+ update_regno_block (ce_info->else_bb, test_bb);
+ if (ce_info->join_bb && regno_block)
+ update_regno_block (ce_info->join_bb, test_bb);
+ if (flag_expensive_optimizations)
+ cond_exec_changed_p = TRUE;
+}
+
/* Merge the blocks and mark for local life update. */

static void
@@ -3193,6 +3230,293 @@ dead_or_predicable (basic_block test_bb,
cancel_changes (0);
return FALSE;
}
+
+/* Called via for_each_rtx by if_convert, in a linear forward scan over all
+ instructions, if *px is a REG, this updates the corresponding entry in
+ REGNO_BLOCK accordingly. PBLOCK is the current basic block. */
+static int
+ifconvert_mark_reg_use (rtx *px, void *pblock)
+{
+ rtx x = *px;
+ int regno;
+
+ if (!x || GET_CODE (x) != REG)
+ return 0;
+ regno = REGNO (x);
+ if (!regno_block[regno].first)
+ regno_block[regno].first = pblock;
+ regno_block[regno].last = pblock;
+ return 0;
+}
+
+/* Used to describe that FROM is going to be merged into TO. */
+struct update_regno_block_info { basic_block from, to; };
+
+/* Called via for_each_rtx by update_regno_block, if *px is a REG,
+ update its REGNO_BLOCK entry according to the information in PINFO,
+ which is a struct update_regno_block_info *. */
+static int
+ifconvert_update_reg_use (rtx *px, void *pinfo)
+{
+ rtx x = *px;
+ struct update_regno_block_info *pi = pinfo;
+
+ int regno;
+
+ if (!x || GET_CODE (x) != REG)
+ return 0;
+ regno = REGNO (x);
+ if (regno >= regno_block_max)
+ return 0;
+ if (regno_block[regno].first == pi->from)
+ regno_block[regno].first = pi->to;
+ if (regno_block[regno].last == pi->from)
+ regno_block[regno].last = pi->to;
+ return 0;
+}
+
+/* FROM_BB is going to be merged into TO_BB. Update REGNO_BLOCK
+ accordingly. */
+static void
+update_regno_block (basic_block from_bb, basic_block to_bb)
+{
+ rtx insn;
+ struct update_regno_block_info info;
+
+ info.from = from_bb;
+ info.to = to_bb;
+ for (insn = BB_HEAD (from_bb); ; insn = NEXT_INSN (insn))
+ {
+ if (INSN_P (insn))
+ {
+ for_each_rtx (&PATTERN (insn), &ifconvert_update_reg_use, &info);
+ for_each_rtx (&REG_NOTES (insn), &ifconvert_update_reg_use, &info);
+ }
+ if (insn == BB_END (from_bb))
+ break;
+ }
+}
+
+/* Constants used to size arrays in struct equiv_info. When these limits
+ are exceeded, struct_equiv returns zero. */
+#define STRUCT_EQUIV_MAX_LOCAL 16
+/* The maximum number of references to an input register that struct_equiv
+ can handle. */
+#define STRUCT_EQUIV_MAX_INPUT 9
+
+/* A struct equiv_info is used to pass information to struct_equiv and
+ to gather state while two basic blocks are checked for structural
+ equivalence.
+ X_BLOCK and Y_BLOCK are the two block being compared.
+ X_INPUT and Y_INPUT are used by struct_equiv to record a register that
+ is used as an input parameter, i.e. where different registers are used
+ as sources.
+ INPUT gathers pointers to all places where the input register is mentioned
+ in X_BLOCK, with INPUT_COUNT being the index for the next free entry.
+ X_LOCAL and Y_LOCAL are used to gather register numbers of register pairs
+ that are local to X_BLOCK and Y_BLOCK, with LOCAL_COUNT being the index
+ to the next free entry. */
+struct equiv_info
+{
+ basic_block x_block, y_block;
+ rtx x_input, y_input;
+ int input_count, local_count;
+ rtx *input[STRUCT_EQUIV_MAX_INPUT];
+ int x_local[STRUCT_EQUIV_MAX_LOCAL], y_local[STRUCT_EQUIV_MAX_LOCAL];
+};
+
+/* Check if *xp is equivalent to Y. RVALUE indicates if the processed
+ piece of rtl is used as a destination, in which case we can't have
+ different registers being an input. Returns nonzero if the two blocks
+ have been identified as equivalent, zero otherwise. */
+static int
+struct_equiv (rtx *xp, rtx y, int rvalue, struct equiv_info *info)
+{
+ rtx x = *xp;
+ enum rtx_code code;
+ int length;
+ const char *format;
+ int i;
+
+ if (!y || !x)
+ return x == y;
+ code = GET_CODE (y);
+ if (GET_CODE (x) != code
+ || GET_MODE (x) != GET_MODE (y))
+ return 0;
+ /* ??? could extend to allow CONST_INT inputs. */
+ if (code == REG)
+ {
+ int x_regno = REGNO (x);
+ int y_regno = REGNO (y);
+ int x_local, y_local;
+
+ if (x_regno == y_regno)
+ return 1;
+ x_local = (x_regno >= regno_block_max
+ || (regno_block[x_regno].first == info->x_block
+ && regno_block[x_regno].last == info->x_block));
+ y_local = (y_regno >= regno_block_max
+ || (regno_block[y_regno].first == info->y_block
+ && regno_block[y_regno].last == info->y_block));
+ if (x_local && y_local)
+ {
+ for (i = info->local_count - 1; i >= 0; i--)
+ {
+ if (x_regno == info->x_local[i] && y_regno == info->y_local[i])
+ return 1;
+ if (x_regno == info->x_local[i] || y_regno == info->y_local[i])
+ return 0;
+ }
+ if (info->local_count >= STRUCT_EQUIV_MAX_LOCAL)
+ return 0;
+ info->x_local[info->local_count] = x_regno;
+ info->y_local[info->local_count] = y_regno;
+ info->local_count++;
+ return 1;
+ }
+ if (x_local || y_local || !rvalue
+ || info->input_count >= STRUCT_EQUIV_MAX_INPUT)
+ return 0;
+ if (info->x_input == NULL_RTX && info->y_input == NULL_RTX)
+ {
+ info->x_input = x;
+ info->y_input = y;
+ }
+ else if (! rtx_equal_p (x, info->x_input)
+ || ! rtx_equal_p (y, info->y_input))
+ return 0;
+ info->input[info->input_count++] = xp;
+ return 1;
+ }
+ else if (code == SET)
+ return (struct_equiv (&SET_SRC (x), SET_SRC (y), 1, info)
+ && struct_equiv (&SET_DEST (x), SET_DEST (y), 0, info));
+ else if (code == MEM)
+ return struct_equiv (&XEXP (x, 0), XEXP (y, 0), 1, info);
+
+ /* Process subexpressions - this is similar to rtx_equal_p. */
+ length = GET_RTX_LENGTH (code);
+ format = GET_RTX_FORMAT (code);
+
+ for (i = 0; i < length; ++i)
+ {
+ switch (format[i])
+ {
+ case 'w':
+ if (XWINT (x, i) != XWINT (y, i))
+ return 0;
+ break;
+ case 'n':
+ case 'i':
+ if (XINT (x, i) != XINT (y, i))
+ return 0;
+ break;
+ case 'V':
+ case 'E':
+ if (XVECLEN (x, i) != XVECLEN (y, i))
+ return 0;
+ if (XVEC (x, i) != 0)
+ {
+ int j;
+ for (j = 0; j < XVECLEN (x, i); ++j)
+ {
+ if (! struct_equiv (&XVECEXP (x, i, j), XVECEXP (y, i, j),
+ rvalue, info))
+ return 0;
+ }
+ }
+ break;
+ case 'e':
+ if (! struct_equiv (&XEXP (x, i), XEXP (y, i), rvalue, info))
+ return 0;
+ break;
+ case 'S':
+ case 's':
+ if ((XSTR (x, i) || XSTR (y, i))
+ && (! XSTR (x, i) || ! XSTR (y, i)
+ || strcmp (XSTR (x, i), XSTR (y, i))))
+ return 0;
+ break;
+ case 'u':
+ /* These are just backpointers, so they don't matter. */
+ break;
+ case '0':
+ case 't':
+ break;
+ default:
+ /* Err on the side of caution. */
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/* Try to use a cmove where if end else blocks are structurally equivalent
+ blocks that differ only by an input register, and possible some local
+ registers. */
+static int
+noce_try_complex_cmove (struct ce_if_block * ce_info,
+ struct noce_if_info *if_info, rtx jump, rtx cond)
+{
+ basic_block then_bb = ce_info->then_bb; /* THEN */
+ basic_block else_bb = ce_info->else_bb; /* ELSE or NULL */
+ rtx xi, yi, x_end, y_end;
+ struct equiv_info info;
+ int i;
+ rtx temp;
+ rtx next;
+
+ if_info->cond = cond;
+ if_info->jump = jump;
+ info.x_block = else_bb;
+ info.y_block = then_bb;
+ info.x_input = info.y_input = NULL_RTX;
+ info.input_count = info.local_count = 0;
+ xi = first_active_insn (else_bb);
+ yi = first_active_insn (then_bb);
+ if (!xi || !yi)
+ return FALSE;
+ x_end = BB_END (else_bb);
+ y_end = prev_active_insn (BB_END (then_bb));
+ for (;;)
+ {
+ if (INSN_P (xi) != INSN_P (yi))
+ return FALSE;
+ if (INSN_P (xi))
+ {
+ if (! struct_equiv (&PATTERN (xi), PATTERN (yi), 1, &info)
+ || ! struct_equiv (&REG_NOTES (xi), REG_NOTES (yi), 1, &info))
+ return 0;
+ }
+ if (xi == x_end || yi == y_end)
+ break;
+ xi = NEXT_INSN (xi);
+ yi = NEXT_INSN (yi);
+ }
+ if (xi != x_end || yi != y_end || !info.input_count)
+ return FALSE;
+ temp = gen_reg_rtx (GET_MODE (info.x_input));
+ if_info->a = info.y_input;
+ if_info->b = info.x_input;
+ if_info->x = temp;
+ if (! noce_try_cmove (if_info))
+ return FALSE;
+ will_merge_if_block (ce_info);
+ for (i = info.input_count-1; i >= 0; i--)
+ *info.input[i] = temp;
+ for (yi = BB_HEAD (then_bb); BB_HEAD (then_bb) != BB_END (then_bb); yi = next)
+ {
+ next = NEXT_INSN (yi);
+ if (INSN_P (yi))
+ next = delete_insn (yi);
+ if (yi == BB_END (then_bb) || PREV_INSN (yi) == BB_END (then_bb))
+ break;
+ }
+ delete_insn (jump);
+ merge_if_block (ce_info);
+ return TRUE;
+}

/* Main entry point for all if-conversion. */

@@ -3221,6 +3545,32 @@ if_convert (int x_life_data_ok)
if (life_data_ok)
clear_bb_flags ();

+ if (flag_expensive_optimizations)
+ {
+ /* For each register, note in which basic blocks it is mentioned first
+ and last. */
+ basic_block current_block = NULL_BLOCK;
+ rtx insn;
+
+ regno_block_max = max_reg_num ();
+ regno_block = xcalloc (regno_block_max, sizeof *regno_block);
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ if (GET_CODE (insn) == NOTE
+ && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
+ current_block = NOTE_BASIC_BLOCK (insn);
+ else if (INSN_P (insn))
+ {
+ for_each_rtx (&PATTERN (insn), &ifconvert_mark_reg_use,
+ current_block);
+ for_each_rtx (&REG_NOTES (insn), &ifconvert_mark_reg_use,
+ current_block);
+ }
+ }
+ }
+ else
+ regno_block = 0;
+
/* Go through each of the basic blocks looking for things to convert. If we
have conditional execution, we make multiple passes to allow us to handle
IF-THEN{-ELSE} blocks within other IF-THEN{-ELSE} blocks. */
@@ -3248,6 +3598,9 @@ if_convert (int x_life_data_ok)
#endif
}
while (cond_exec_changed_p);
+
+ if (regno_block)
+ free (regno_block);

#ifdef IFCVT_MULTIPLE_DUMPS
if (rtl_dump_file)
Richard Henderson
2004-01-30 02:54:59 UTC
Permalink
On Thu, Jan 29, 2004 at 08:07:15PM +0000, Joern Rennecke wrote:
> +static int
> +struct_equiv (rtx *xp, rtx y, int rvalue, struct equiv_info *info)

It would be nice to generalize this with the existing cross-jumping
code that also tries to do structural matching.

> + x_local = (x_regno >= regno_block_max
> + || (regno_block[x_regno].first == info->x_block
> + && regno_block[x_regno].last == info->x_block));

Why wouldn't noticing that they're dead on entry and dead on exit
be just as good? Seems like that'd be easier than maintaining
your own regno_block thingy.

> + if_info->a = info.y_input;
> + if_info->b = info.x_input;
> + if_info->x = temp;
> + if (! noce_try_cmove (if_info))

I had to add

if_info->insn_a = info->jump;
if_info->insn_b = info->jump;

here in order to even compile your test case on alpha.
But it bombs pretty quickly later during bootstrap:

Program received signal SIGSEGV, Segmentation fault.
0x000000012057f2f8 in get_condition (insn=0x200006b4eb0)
at ../../../src-gcc/gcc/sched-deps.c:148
148 if (GET_CODE (pat) == COND_EXEC)
(gdb) p pat
$3 = 0xafafafafafafafaf
(gdb) p insn
$4 = 0x200006b4eb0
(gdb) pr
(barrier 1132 1126 1134)

Since this is in sched1, I assume you've messed up the basic
block boundaries.

Clearly this will need testing on more targets. If you do x86, then
you should bootstrap with -march=i686 so that cmove is available.


r~
Joern Rennecke
2004-01-30 13:20:40 UTC
Permalink
[struct_equiv]
> It would be nice to generalize this with the existing cross-jumping
> code that also tries to do structural matching.

It certainly makes sense to look at the special cases that the
cross-jumping code handles; I see I missed some of these. I'm not
very happy about the way it handles REG_EQUAL notes, though. It
should only remove non-matching notes if the optimization goes ahead.

Looking for matching outgoing edges instead of linear control flow
merge could improve if-conversion in general. When I was working on
constructing a simple testcase, I first just constructed a plain 3-way
min function, but it got only one simple if-conversion because the
straight jumps to the function return caused the if-conversion to fail
to match.

The cross-jumping code has no need to keep track of an input register.
I don't know if it would benefit from tracking of equivalent local
registers. Would the added overhead in cross-jumping be acceptable?

What would be the right home for a basic block structual comparison
function used by if-convert and cross-jumping? rtlanal.c ? cfgrtl.c?

> > + x_local = (x_regno >= regno_block_max
> > + || (regno_block[x_regno].first == info->x_block
> > + && regno_block[x_regno].last == info->x_block));
>
> Why wouldn't noticing that they're dead on entry and dead on exit
> be just as good? Seems like that'd be easier than maintaining
> your own regno_block thingy.

I think full register liveness information would be harder to keep
up-to-date as if conversion progresses. This optimization only
comes into its own as you match blocks that have been previously
joined by if-conversion.

> Clearly this will need testing on more targets. If you do x86, then
> you should bootstrap with -march=i686 so that cmove is available.

I'll try that. If I read invoke.texi correctly, I want something like
bash-2.05$ make bootstrap BOOT_CFLAGS='-march=i686 -O2 -g' > boot.out 2>&1 &
.
Kelley Cook
2004-01-30 15:58:24 UTC
Permalink
>> Clearly this will need testing on more targets. If you do x86, then
>> you should bootstrap with -march=i686 so that cmove is available.
>
> I'll try that. If I read invoke.texi correctly, I want something like
> bash-2.05$ make bootstrap BOOT_CFLAGS='-march=i686 -O2 -g' > boot.out
2>&1 &

That would work.

Alternatively, you could "configure --with-arch=i686 ..." which would
cause the stage[123] compilers to default to "-march=i686" effectively
accomplishing the same thing as setting BOOT_CFLAGS with the added bonus
that the testsuite would also be run with a compiler defaulting to
"-march=i686".

Kelley Cook
Richard Henderson
2004-01-30 18:56:15 UTC
Permalink
On Fri, Jan 30, 2004 at 01:20:40PM +0000, Joern Rennecke wrote:
> Looking for matching outgoing edges instead of linear control flow
> merge could improve if-conversion in general.

Matching edges is the if_case_N routines, as opposed to
the noce_* routines.

> The cross-jumping code has no need to keep track of an input register.
> I don't know if it would benefit from tracking of equivalent local
> registers. Would the added overhead in cross-jumping be acceptable?

*shrug* depends on how much, I guess. Of course, if you find such
a sequence that does depend on input registers, cross-jump could
certainly make use -- emit one move before the jump. It'd be a
true size improvement over what we currently have.

> What would be the right home for a basic block structual comparison
> function used by if-convert and cross-jumping? rtlanal.c ? cfgrtl.c?

Dunno. It might deserve it's own file.

> I think full register liveness information would be harder to keep
> up-to-date as if conversion progresses.

I think that global_live_at_start/end are kept up-to-date. Leastwise,
I'd be interested to know how badly they're off. We re-run life info
at the end, but that's to get death notes inserted properly.



r~
Joern Rennecke
2004-02-04 18:45:26 UTC
Permalink
> On Fri, Jan 30, 2004 at 01:20:40PM +0000, Joern Rennecke wrote:
> > Looking for matching outgoing edges instead of linear control flow
> > merge could improve if-conversion in general.

Richard Henderson:
> Matching edges is the if_case_N routines, as opposed to
> the noce_* routines.

With matching edges I mean that both the if and the else block have
the same successor, albeit the successor is not adjacent, and might
have other predecessors. I don't see that being handled by the
find_if_case_N routines .

> *shrug* depends on how much, I guess. Of course, if you find such
> a sequence that does depend on input registers, cross-jump could
> certainly make use -- emit one move before the jump. It'd be a
> true size improvement over what we currently have.

The overhead is basically a linear factor - I think it's mainly just a
bigger I-cache footprint for matches. Because cross-jumping will consider
using just the end of a block, we can't really be undecided for a long
time if the transformation is possible. Of course we could send some
time finding out that the transformation is possible for N instructions,
but because we are optimizing for speed, and we can't do the entire block,
we decide it's not useful. But I would that expect to be in the noise.

When we are optimizing for speed, we basically only want the case that
is an if conversion.

> > What would be the right home for a basic block structual comparison
> > function used by if-convert and cross-jumping? rtlanal.c ? cfgrtl.c?
>
> Dunno. It might deserve it's own file.

It doesn't seem quite that large to me. Unless you have objections,
I'll look into putting it into cfgrtl.c

> > I think full register liveness information would be harder to keep
> > up-to-date as if conversion progresses.
>
> I think that global_live_at_start/end are kept up-to-date. Leastwise,
> I'd be interested to know how badly they're off. We re-run life info
> at the end, but that's to get death notes inserted properly.

Hmm. merge_if_block calls rtl_merge_blocks, and it calls it last merging
join_bb into then_bb, which puts the life_at_end information into then_bb.




Would it be OK to re-order register notes to make them easier to compare?
I.e., if I sort them by type to get a normal form, I can compare the
head of both note lists together without having to search the other list
for a match.

I suppose it is best to make two passes over the insns, first to decide
if the optimization can be safely done (for cross-jumping also: for how many
instructions), and a measure of cost for unifying things that are not the
same according to notes (see below), and a if the optimization goes ahead,
a second pass to actually remove all the notes that need removal.
If I sort the notes in the first pass, I can rely on them being sorted
in the second pass.

I noticed that the current cross-jumping code does only care about
REG_EQUAL, REG_EQUIV, and some REG_DEAD notes, but I think the way it
ignores all the other notes is generally unsafe. I've gone through
enum reg_note to evaluate the current handling of the various note types
in the cross jumping code - or lack thereof - for safety:

Safe:
REG_DEAD, REG_UNUSED: should be recomputed afterwards. insns_match_p
checks the special case of stack regs dying.
REG_INC: If insns match, notes should match.
(Note: have to add POST_INC etc handling in struct_equiv to clear rvalue
on recursion)
REG_LABEL: If insns match, notes should match.

Dubious - existence of the note is unlikely or its presence might not
matter, but the current handling can't be considered safe without
further investigation:
REG_EQUIV, REG_EQUAL: flow_find_cross_jump removes non-matching ones.
If a libcall is around when it does this, and it matches except for the
note, it is guaranteed to be broken.
Removing notes can also be considered to have a cost in potentially
missed subsequent optimizations.
REG_RETVAL, REG_LIBCALL: If they are around, we should better make sure they
match. We could remove non-matching ones, but only if we do it for
both the REG_RETVAL and the REG_LIBCALL note of the same libcall.
REG_CC_SETTER, REG_CC_USER: If insns match, notes should match - except
after reorg, when a cc0 setting insn is allowed in a delay slot of a jump.
Accordig to backends.html, this could affect cris.
REG_BR_PROB: It would be prudent to compute the resultant probability,
rather than chucking away one set of data.
If both block-to-be-combined are similarly frequent, but the branch
probability is significantly different, we also inhibit subsequent
probability-based optimizations and static branch prediction. This can
be considered to be an additional cost of the considered transformation.

Unsafe:
REG_NONNEG, REG_NOALIAS, REG_ALWAYS_RETURN: we could remove non-matching
ones; but we must not introduce them on a path that didn't have them.
REG_FRAME_RELATED_EXPR, REG_EH_REGION, REG_SETJMP, REG_VTABLE_REF.

rtl.h / rtl.c garbage???:
REG_EH_CONTEXT
Richard Henderson
2004-02-05 05:42:08 UTC
Permalink
On Wed, Feb 04, 2004 at 06:45:26PM +0000, Joern Rennecke wrote:
> Would it be OK to re-order register notes to make them easier to compare?

Sure. I'm not sure how you'd sensibly force them into a particular
order on input; existing manipulators are too chaotic.

> REG_EQUIV, REG_EQUAL: flow_find_cross_jump removes non-matching ones.

Probably correct.

> REG_RETVAL, REG_LIBCALL: If they are around, we should better make sure they
> match. We could remove non-matching ones, but only if we do it for
> both the REG_RETVAL and the REG_LIBCALL note of the same libcall.

I would just fail if you see a libcall. With tree-ssa, I expect
libcall to go away completely; everything that they were supposed
to allow should already have been done at the tree level.

> REG_CC_SETTER, REG_CC_USER: If insns match, notes should match - except
> after reorg, when a cc0 setting insn is allowed in a delay slot of a jump.
> Accordig to backends.html, this could affect cris.

I think we can ignore the after reorg case?

> REG_BR_PROB: It would be prudent to compute the resultant probability,
> rather than chucking away one set of data.

I think these are dead while the CFG exists; the data is located
in the block/edge frequency fields. This note basically exists
only to carry this data to final, after the CFG is destroyed.

> REG_EH_CONTEXT

This must match, both in existance and value.


r~
Joern Rennecke
2004-02-05 13:38:25 UTC
Permalink
> Sure. I'm not sure how you'd sensibly force them into a particular
> order on input; existing manipulators are too chaotic.

I didn't mean to require the register notes to be sorted all the time.
I mean to sort them when we compare the insns for equality.
I think merge sort should be appropriate, considering we already have
a list structure.

As far as I can tell, we should only see one note of every kind where
we care about differences (e.g. we ignore most REG_DEAD notes), so
it should be OK to just sort by kind. Besides, merge sort is stable,
and considering the insns not equal because multiple notes of the same kind
are ordered differently should be the safe thing to do.

If STACK_REGS is defined, however, we want to sort REG_DEAD notes with
a REG operand also on their REGNO.

>
> > REG_EQUIV, REG_EQUAL: flow_find_cross_jump removes non-matching ones.
>
> Probably correct.

Hmm, I just notice: REG_EQUAL and REG_EQUIV are considered to match if
the insides match. I can't think of a scenario off the top of my head
where we'll see two such unequal but matching notes, but I suppose it
should be a safer to consider them not matching instead.

> > REG_RETVAL, REG_LIBCALL: If they are around, we should better make sure they
> > match. We could remove non-matching ones, but only if we do it for
> > both the REG_RETVAL and the REG_LIBCALL note of the same libcall.
>
> I would just fail if you see a libcall. With tree-ssa, I expect
> libcall to go away completely; everything that they were supposed
> to allow should already have been done at the tree level.

It shouldn't hurt to let through a matching libcall. I was thinking
of letting through all reg notes that match, i.e. same type and
the expressions compare equal with rtx_equal_p, and only if they
fail to match, we consider if this can be resolved by removing the
note. Any insn should have no more than one REG_EQUAL or REG_EQUIV note,
so checking in the case of a REG_EQUAL / REG_EQUIV mismatch for the
presence of a REG_RETVAL note on the same insn does not increase the
complexity, and I wouldn't expect such mismatches to happen that often on
otherwise matching insns that we would have to worry about the extra
search more than a special-case check for REG_RETVAL in the generic
comparison code.

> I think we can ignore the after reorg case?
Hmm, you are right, we shouldn't do any optimization that are that high-level
after reorg, since it comes after machine_dependent_reorg. Of course,
if we rely on this, we can't change the scheduler later to generate
sequences with cc0 setters inside.

> > REG_EH_CONTEXT
>
> This must match, both in existance and value.

I think it's dead.

***@linsvr3(8)% find . -type f | xargs grep REG_EH_CONTEXT
cat version.c
./ChangeLog.0: * rtl.h, rtl.c: New reg_note REG_EH_CONTEXT.
./rtl.c: "REG_FRAME_RELATED_EXPR", "REG_EH_CONTEXT", "REG_EH_REGION",
./rtl.h: REG_EH_CONTEXT,
***@linsvr3(9)% cat version.c
#include "version.h"

/* This is the string reported as the version number by all components
of the compiler. If you distribute a modified version of GCC,
please modify this string to indicate that, e.g. by putting your
organization's name in parentheses at the end of the string. */

const char version_string[] = "3.5.0 20040204 (experimental)";

/* This is the location of the online document giving instructions for
reporting bugs. If you distribute a modified version of GCC,
please change this to refer to a document giving instructions for
reporting bugs to you, not us. (You are of course welcome to
forward us bugs reported to you, if you determine that they are
not bugs in your modifications.) */

const char bug_report_url[] = "<URL:http://gcc.gnu.org/bugs.html>";
Richard Henderson
2004-02-05 18:31:40 UTC
Permalink
On Thu, Feb 05, 2004 at 01:38:25PM +0000, Joern Rennecke wrote:
> If STACK_REGS is defined, however, we want to sort REG_DEAD notes with
> a REG operand also on their REGNO.

Not needed, since the special importance of REG_DEAD for STACK_REGS
doesn't happen until reg-stack, and we can't run anything afterward.

> > > REG_EH_CONTEXT
> >
> > This must match, both in existance and value.
>
> I think it's dead.

Indeed. I was thinking of REG_EH_REGION.



r~
Joern Rennecke
2004-02-05 17:44:54 UTC
Permalink
> I'll look into putting it into cfgrtl.c

Hmm, after looking at the file headers again, I think cfgcleanup.c
is a better fit.
Ulrich Weigand
2004-02-05 13:34:46 UTC
Permalink
Joern Rennecke wrote:

>I noticed that the current cross-jumping code does only care about
>REG_EQUAL, REG_EQUIV, and some REG_DEAD notes, but I think the way it
>ignores all the other notes is generally unsafe.

Talking about unsafe: the current cross-jumping code also ignores
memory alias information, see:

http://gcc.gnu.org/ml/gcc/2004-02/msg00090.html

Bye,
Ulrich

--
Dr. Ulrich Weigand
***@informatik.uni-erlangen.de
Continue reading on narkive:
Loading...