I verified that the patch fixes the performance regression on intmm. I have
very well be noise.
Post by Bin ChengPR tree-optimization/62178
* tree-ssa-loop-ivopts.c (enum sel_type): New.
(iv_ca_add_use): Add parameter RELATED_P and find the best cand
for iv use if it's true.
(try_add_cand_for, get_initial_solution): Change paramter ORIGINALP
to SELECT_TYPE and handle it.
(find_optimal_iv_set_1): Ditto.
(try_prune_iv_set, find_optimal_iv_set_2): New functions.
(find_optimal_iv_set): Call find_optimal_iv_set_2 and choose the
best candidate set.
gcc/testsuite/ChangeLog
PR tree-optimization/62178
* gcc.target/aarch64/pr62178.c: New test.
Index: gcc/testsuite/gcc.target/aarch64/pr62178.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/pr62178.c (revision 0)
+++ gcc/testsuite/gcc.target/aarch64/pr62178.c (revision 0)
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int a[30 +1][30 +1], b[30 +1][30 +1], r[30 +1][30 +1];
+
+void Intmm (int run) {
+ int i, j, k;
+
+ for ( i = 1; i <= 30; i++ )
+ for ( j = 1; j <= 30; j++ ) {
+ r[i][j] = 0;
+ for(k = 1; k <= 30; k++ )
+ r[i][j] += a[i][k]*b[k][j];
+ }
+}
+
+/* { dg-final { scan-assembler "ld1r\\t\{v\[0-9\]+\."} } */
Index: gcc/tree-ssa-loop-ivopts.c
===================================================================
--- gcc/tree-ssa-loop-ivopts.c (revision 215113)
+++ gcc/tree-ssa-loop-ivopts.c (working copy)
@@ -254,6 +254,14 @@ struct iv_inv_expr_ent
hashval_t hash;
};
+/* Types used to start selecting the candidate for each IV use. */
+enum sel_type
+{
+ SEL_ORIGINAL, /* Start selecting from original cands. */
+ SEL_IMPORTANT, /* Start selecting from important cands. */
+ SEL_RELATED /* Start selecting from related cands. */
+};
+
/* The data used by the induction variable optimizations. */
typedef struct iv_use *iv_use_p;
@@ -5417,22 +5425,51 @@ iv_ca_set_cp (struct ivopts_data *data, struct iv_
}
/* Extend set IVS by expressing USE by some of the candidates in it
- if possible. Consider all important candidates if candidates in
- set IVS don't give any result. */
+ if possible. If RELATED_P is FALSE, consider all important
+ candidates if candidates in set IVS don't give any result;
+ otherwise, try to find the best one from related or all candidates,
+ depending on consider_all_candidates. */
static void
iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
- struct iv_use *use)
+ struct iv_use *use, bool related_p)
{
struct cost_pair *best_cp = NULL, *cp;
bitmap_iterator bi;
unsigned i;
struct iv_cand *cand;
- gcc_assert (ivs->upto >= use->id);
+ gcc_assert (ivs->upto == use->id);
ivs->upto++;
ivs->bad_uses++;
+ if (related_p)
+ {
+ if (data->consider_all_candidates)
+ {
+ for (i = 0; i < n_iv_cands (data); i++)
+ {
+ cand = iv_cand (data, i);
+ cp = get_use_iv_cost (data, use, cand);
+ if (cheaper_cost_pair (cp, best_cp))
+ best_cp = cp;
+ }
+ }
+ else
+ {
+ EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
+ {
+ cand = iv_cand (data, i);
+ cp = get_use_iv_cost (data, use, cand);
+ if (cheaper_cost_pair (cp, best_cp))
+ best_cp = cp;
+ }
+ }
+
+ iv_ca_set_cp (data, ivs, use, best_cp);
+ return;
+ }
+
EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
{
cand = iv_cand (data, i);
@@ -5440,7 +5477,7 @@ iv_ca_add_use (struct ivopts_data *data, struct iv
if (cheaper_cost_pair (cp, best_cp))
best_cp = cp;
}
-
+
if (best_cp == NULL)
{
EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
@@ -5869,13 +5906,15 @@ iv_ca_prune (struct ivopts_data *data, struct iv_c
}
/* Tries to extend the sets IVS in the best possible way in order
- to express the USE. If ORIGINALP is true, prefer candidates from
- the original set of IVs, otherwise favor important candidates not
- based on any memory object. */
+ to express the USE. If SELECT_TYPE is SEL_ORIGINAL, prefer
+ candidates from the original set of IVs; if it's SEL_IMPORTANT,
+ favor important candidates not based on any memory object;
+ if it's SEL_RELATED, assign the best candidate to each use if
+ possible. */
static bool
try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
- struct iv_use *use, bool originalp)
+ struct iv_use *use, enum sel_type select_type)
{
comp_cost best_cost, act_cost;
unsigned i;
@@ -5883,8 +5922,9 @@ try_add_cand_for (struct ivopts_data *data, struct
struct iv_cand *cand;
struct iv_ca_delta *best_delta = NULL, *act_delta;
struct cost_pair *cp;
+ bool originalp = (select_type == SEL_ORIGINAL);
- iv_ca_add_use (data, ivs, use);
+ iv_ca_add_use (data, ivs, use, select_type == SEL_RELATED);
best_cost = iv_ca_cost (ivs);
cp = iv_ca_cand_for_use (ivs, use);
if (cp)
@@ -5892,7 +5932,16 @@ try_add_cand_for (struct ivopts_data *data, struct
best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
iv_ca_set_no_cp (data, ivs, use);
}
+ if (select_type == SEL_RELATED)
+ {
+ /* We should have selected the best candidate if possible. */
+ gcc_assert (cp != NULL || infinite_cost_p (best_cost));
+ iv_ca_delta_commit (data, ivs, best_delta, true);
+ iv_ca_delta_free (&best_delta);
+ return !infinite_cost_p (best_cost);
+ }
+
/* If ORIGINALP is true, try to find the original IV for the use. Otherwise
first try important candidates not based on any memory object. Only if
this fails, try the specific ones. Rationale -- in loops with many
@@ -5983,16 +6032,17 @@ try_add_cand_for (struct ivopts_data *data, struct
return !infinite_cost_p (best_cost);
}
-/* Finds an initial assignment of candidates to uses. */
+/* Finds an initial assignment of candidates to uses according to
+ SELECT_TYPE. */
static struct iv_ca *
-get_initial_solution (struct ivopts_data *data, bool originalp)
+get_initial_solution (struct ivopts_data *data, enum sel_type select_type)
{
struct iv_ca *ivs = iv_ca_new (data);
unsigned i;
for (i = 0; i < n_iv_uses (data); i++)
- if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
+ if (!try_add_cand_for (data, ivs, iv_use (data, i), select_type))
{
iv_ca_free (&ivs);
return NULL;
@@ -6059,17 +6109,43 @@ try_improve_iv_set (struct ivopts_data *data, stru
return true;
}
+/* Tries to prune set of induction variables IVS. */
+
+static bool
+try_prune_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
+{
+ comp_cost new_cost, old_cost;
+ struct iv_ca_delta *delta = NULL;
+
+ if (iv_ca_n_cands (ivs) > ALWAYS_PRUNE_CAND_SET_BOUND)
+ return false;
+
+ old_cost = iv_ca_cost (ivs);
+ /* Try pruning the candidates from the set. */
+ new_cost = iv_ca_prune (data, ivs, NULL, &delta);
+
+ /* Nothing more we can do. */
+ if (!delta
+ || compare_costs (new_cost, old_cost) >= 0)
+ return false;
+
+ iv_ca_delta_commit (data, ivs, delta, true);
+ gcc_assert (compare_costs (new_cost, iv_ca_cost (ivs)) == 0);
+ iv_ca_delta_free (&delta);
+ return true;
+}
+
/* Attempts to find the optimal set of induction variables. We do simple
greedy heuristic -- we try to replace at most one candidate in the selected
solution and remove the unused ivs while this improves the cost. */
static struct iv_ca *
-find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
+find_optimal_iv_set_1 (struct ivopts_data *data, enum sel_type select_type)
{
struct iv_ca *set;
/* Get the initial solution. */
- set = get_initial_solution (data, originalp);
+ set = get_initial_solution (data, select_type);
if (!set)
{
if (dump_file && (dump_flags & TDF_DETAILS))
@@ -6095,25 +6171,72 @@ static struct iv_ca *
return set;
}
+/* Attempts to find the optimal set of induction variables. Different
+ with find_optimal_iv_set_1 -- this function uses greedy heuristic
+ that firstly assigns the best candidate to each use, then tries to
+ prune the solution. */
+
static struct iv_ca *
+find_optimal_iv_set_2 (struct ivopts_data *data, enum sel_type select_type)
+{
+ struct iv_ca *set;
+
+ /* Get the initial solution. */
+ set = get_initial_solution (data, select_type);
+ if (!set)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
+ return NULL;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Initial set of candidates:\n");
+ iv_ca_dump (data, dump_file, set);
+ }
+
+ while (try_prune_iv_set (data, set))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Pruned to:\n");
+ iv_ca_dump (data, dump_file, set);
+ }
+ }
+
+ return set;
+}
+
+/* Find the optimal IV set by using two different heuristic strategies.
+ The first strategy starts with small IV solution, tries to replace at
+ most one candidate with others not in the current solution at one
+ time. The second strategy starts with large IV set, tries to replace
+ candidates with others in the current solution. */
+
+static struct iv_ca *
find_optimal_iv_set (struct ivopts_data *data)
{
unsigned i;
- struct iv_ca *set, *origset;
+ struct iv_ca *set, *origset, *pruneset;
struct iv_use *use;
- comp_cost cost, origcost;
+ comp_cost cost, origcost, prunecost;
- /* Determine the cost based on a strategy that starts with original IVs,
- and try again using a strategy that prefers candidates not based
- on any IVs. */
- origset = find_optimal_iv_set_1 (data, true);
- set = find_optimal_iv_set_1 (data, false);
+ /* Try to find optimal IV set using the first strategy and starting
+ with original IVS. */
+ origset = find_optimal_iv_set_1 (data, SEL_ORIGINAL);
+ /* Try to find optimal IV set using the first strategy and starting
+ with important IVS. */
+ set = find_optimal_iv_set_1 (data, SEL_IMPORTANT);
+ /* Try to find optimal IV set using the second strategy. */
+ pruneset = find_optimal_iv_set_2 (data, SEL_RELATED);
- if (!origset && !set)
+ if (!origset && !set && !pruneset)
return NULL;
origcost = origset ? iv_ca_cost (origset) : infinite_cost;
cost = set ? iv_ca_cost (set) : infinite_cost;
+ prunecost = pruneset ? iv_ca_cost (pruneset) : infinite_cost;
if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -6121,9 +6244,12 @@ find_optimal_iv_set (struct ivopts_data *data)
origcost.cost, origcost.complexity);
fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
cost.cost, cost.complexity);
+ fprintf (dump_file, "Pruned cost %d (complexity %d)\n\n",
+ prunecost.cost, prunecost.complexity);
}
- /* Choose the one with the best cost. */
+ /* Choose the one with the best cost among the three candidates. */
+
if (compare_costs (origcost, cost) <= 0)
{
if (set)
@@ -6133,6 +6259,15 @@ find_optimal_iv_set (struct ivopts_data *data)
else if (origset)
iv_ca_free (&origset);
+ if (compare_costs (prunecost, cost) < 0)
+ {
+ if (set)
+ iv_ca_free (&set);
+ set = pruneset;
+ }
+ else if (pruneset)
+ iv_ca_free (&pruneset);
+
for (i = 0; i < n_iv_uses (data); i++)
{
use = iv_use (data, i);