Ilya Verbin
2014-09-27 18:16:47 UTC
Hello,
This patch enables the streaming of LTO bytecode, needed by offload target,
using existing LTO infrastructure. It creates new prefix for the section names
(.gnu.target_lto_) and streams out the functions and variables with "omp declare
target" attribute, including the functions for outlined '#pragma omp target'
regions. The offload compiler (under ifdef ACCEL_COMPILER) reads and compiles
these new sections.
But I have doubts regarding the offload_lto_mode switch. Why I added it:
The outlined target regions (say omp_fn0) contains references from the parent
functions. And that's correct for the case when we stream out the host-side
version of omp_fn0. But for the target version there are no parent functions,
node->used_from_other_partition gets incorrect value (always 1), and offload
compiler crashes on streaming in.
Another solution is to remain referenced_from_other_partition_p and
reachable_from_other_partition_p unchanged, then used_from_other_partition will
have incorrect value for target regions, but the offload compiler will just
ignore it. Which approach is better?
Anyway, now it's bootstrapped and regtested on i686-linux and x86_64-linux.
2014-09-27 Ilya Verbin <***@intel.com>
Ilya Tocar <***@intel.com>
Andrey Turetskiy <***@intel.com>
Bernd Schmidt <***@codesourcery.com>
gcc/
* cgraph.h (symtab_node): Add need_dump flag.
* cgraphunit.c: Include lto-section-names.h.
(initialize_offload): New function.
(ipa_passes): Initialize offload and call ipa_write_summaries if there
is something to write to OMP_SECTION_NAME_PREFIX sections.
(symbol_table::compile): Call lto_streamer_hooks_init under flag_openmp.
* ipa-inline-analysis.c (inline_generate_summary): Do not exit under
flag_openmp.
(inline_free_summary): Always remove hooks.
* lto-cgraph.c (lto_set_symtab_encoder_in_partition): Exit if there is
no need to encode the node.
(referenced_from_other_partition_p, reachable_from_other_partition_p):
Ignore references from non-target functions to target functions if we
are streaming out target-side bytecode (offload lto mode).
(select_what_to_dump): New function.
* lto-section-names.h (OMP_SECTION_NAME_PREFIX): Define.
(section_name_prefix): Declare.
* lto-streamer.c (offload_lto_mode): New variable.
(section_name_prefix): New variable.
(lto_get_section_name): Use section_name_prefix instead of
LTO_SECTION_NAME_PREFIX.
* lto-streamer.h (select_what_to_dump): Declare.
(offload_lto_mode): Declare.
* omp-low.c (is_targetreg_ctx): New function.
(create_omp_child_function, check_omp_nesting_restrictions): Use it.
(expand_omp_target): Set mark_force_output for the target functions.
(lower_omp_critical): Add target attribute for omp critical symbol.
* passes.c (ipa_write_summaries): Call select_what_to_dump.
gcc/lto/
* lto-object.c (lto_obj_add_section): Use section_name_prefix instead of
LTO_SECTION_NAME_PREFIX.
* lto-partition.c (add_symbol_to_partition_1): Always set
node->need_dump to true.
(lto_promote_cross_file_statics): Call select_what_to_dump.
* lto.c (lto_section_with_id): Use section_name_prefix instead of
LTO_SECTION_NAME_PREFIX.
(read_cgraph_and_symbols): Read OMP_SECTION_NAME_PREFIX sections, if
being built as an offload compiler.
Thanks,
-- Ilya
---
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 7481906..9ab970d 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -444,6 +444,11 @@ public:
/* Set when init priority is set. */
unsigned in_init_priority_hash : 1;
+ /* Set when symbol needs to be dumped into LTO bytecode for LTO,
+ or in pragma omp target case, for separate compilation targeting
+ a different architecture. */
+ unsigned need_dump : 1;
+
/* Ordering of all symtab entries. */
int order;
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index b854e4b..4ab4c57 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -211,6 +211,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-nested.h"
#include "gimplify.h"
#include "dbgcnt.h"
+#include "lto-section-names.h"
/* Queue of cgraph nodes scheduled to be added into cgraph. This is a
secondary queue used during optimization to accommodate passes that
@@ -1994,9 +1995,40 @@ output_in_order (bool no_reorder)
free (nodes);
}
+/* Check whether there is at least one function or global variable to offload.
+ */
+
+static bool
+initialize_offload (void)
+{
+ bool have_offload = false;
+ struct cgraph_node *node;
+ struct varpool_node *vnode;
+
+ FOR_EACH_DEFINED_FUNCTION (node)
+ if (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (node->decl)))
+ {
+ have_offload = true;
+ break;
+ }
+
+ FOR_EACH_DEFINED_VARIABLE (vnode)
+ {
+ if (!lookup_attribute ("omp declare target",
+ DECL_ATTRIBUTES (vnode->decl))
+ || TREE_CODE (vnode->decl) != VAR_DECL
+ || DECL_SIZE (vnode->decl) == 0)
+ continue;
+ have_offload = true;
+ }
+
+ return have_offload;
+}
+
static void
ipa_passes (void)
{
+ bool have_offload = false;
gcc::pass_manager *passes = g->get_passes ();
set_cfun (NULL);
@@ -2004,6 +2036,14 @@ ipa_passes (void)
gimple_register_cfg_hooks ();
bitmap_obstack_initialize (NULL);
+ if (!in_lto_p && flag_openmp)
+ {
+ have_offload = initialize_offload ();
+ /* OpenMP offloading requires LTO infrastructure. */
+ if (have_offload)
+ flag_generate_lto = 1;
+ }
+
invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_START, NULL);
if (!in_lto_p)
@@ -2041,7 +2081,20 @@ ipa_passes (void)
targetm.asm_out.lto_start ();
if (!in_lto_p)
- ipa_write_summaries ();
+ {
+ if (have_offload)
+ {
+ offload_lto_mode = true;
+ section_name_prefix = OMP_SECTION_NAME_PREFIX;
+ ipa_write_summaries ();
+ }
+ if (flag_lto)
+ {
+ offload_lto_mode = false;
+ section_name_prefix = LTO_SECTION_NAME_PREFIX;
+ ipa_write_summaries ();
+ }
+ }
if (flag_generate_lto)
targetm.asm_out.lto_end ();
@@ -2122,7 +2175,7 @@ symbol_table::compile (void)
state = IPA;
/* If LTO is enabled, initialize the streamer hooks needed by GIMPLE. */
- if (flag_lto)
+ if (flag_lto || flag_openmp)
lto_streamer_hooks_init ();
/* Don't run the IPA passes if there was any error or sorry messages. */
diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c
index 38f56d2..076a1e8 100644
--- a/gcc/ipa-inline-analysis.c
+++ b/gcc/ipa-inline-analysis.c
@@ -4010,7 +4010,7 @@ inline_generate_summary (void)
/* When not optimizing, do not bother to analyze. Inlining is still done
because edge redirection needs to happen there. */
- if (!optimize && !flag_lto && !flag_wpa)
+ if (!optimize && !flag_lto && !flag_wpa && !flag_openmp)
return;
function_insertion_hook_holder =
@@ -4325,11 +4325,6 @@ void
inline_free_summary (void)
{
struct cgraph_node *node;
- if (!inline_edge_summary_vec.exists ())
- return;
- FOR_EACH_DEFINED_FUNCTION (node)
- if (!node->alias)
- reset_inline_summary (node);
if (function_insertion_hook_holder)
symtab->remove_cgraph_insertion_hook (function_insertion_hook_holder);
function_insertion_hook_holder = NULL;
@@ -4345,6 +4340,11 @@ inline_free_summary (void)
if (edge_duplication_hook_holder)
symtab->remove_edge_duplication_hook (edge_duplication_hook_holder);
edge_duplication_hook_holder = NULL;
+ if (!inline_edge_summary_vec.exists ())
+ return;
+ FOR_EACH_DEFINED_FUNCTION (node)
+ if (!node->alias)
+ reset_inline_summary (node);
vec_free (inline_summary_vec);
inline_edge_summary_vec.release ();
if (edge_predicate_pool)
diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
index 0584946..78b7fc8 100644
--- a/gcc/lto-cgraph.c
+++ b/gcc/lto-cgraph.c
@@ -239,6 +239,9 @@ void
lto_set_symtab_encoder_in_partition (lto_symtab_encoder_t encoder,
symtab_node *node)
{
+ /* Ignore not needed nodes. */
+ if (!node->need_dump)
+ return;
int index = lto_symtab_encoder_encode (encoder, node);
encoder->nodes[index].in_partition = true;
}
@@ -321,6 +324,12 @@ referenced_from_other_partition_p (symtab_node *node, lto_symtab_encoder_t encod
for (i = 0; node->iterate_referring (i, ref); i++)
{
+ /* Ignore references from non-target functions in offload lto mode. */
+ if (offload_lto_mode
+ && !lookup_attribute ("omp declare target",
+ DECL_ATTRIBUTES (ref->referring->decl)))
+ continue;
+
if (ref->referring->in_other_partition
|| !lto_symtab_encoder_in_partition_p (encoder, ref->referring))
return true;
@@ -339,9 +348,17 @@ reachable_from_other_partition_p (struct cgraph_node *node, lto_symtab_encoder_t
if (node->global.inlined_to)
return false;
for (e = node->callers; e; e = e->next_caller)
- if (e->caller->in_other_partition
- || !lto_symtab_encoder_in_partition_p (encoder, e->caller))
- return true;
+ {
+ /* Ignore references from non-target functions in offload lto mode. */
+ if (offload_lto_mode
+ && !lookup_attribute ("omp declare target",
+ DECL_ATTRIBUTES (e->caller->decl)))
+ continue;
+
+ if (e->caller->in_other_partition
+ || !lto_symtab_encoder_in_partition_p (encoder, e->caller))
+ return true;
+ }
return false;
}
@@ -802,6 +819,18 @@ create_references (lto_symtab_encoder_t encoder, symtab_node *node)
lto_symtab_encoder_encode (encoder, ref->referred);
}
+/* Select what needs to be streamed out. In regular lto mode stream everything.
+ In offload lto mode stream only stuff marked with an attribute. */
+void
+select_what_to_dump (void)
+{
+ struct symtab_node *snode;
+ FOR_EACH_SYMBOL (snode)
+ snode->need_dump = !offload_lto_mode
+ || lookup_attribute ("omp declare target",
+ DECL_ATTRIBUTES (snode->decl));
+}
+
/* Find all symbols we want to stream into given partition and insert them
to encoders.
diff --git a/gcc/lto-section-names.h b/gcc/lto-section-names.h
index cb75230..06d2caf 100644
--- a/gcc/lto-section-names.h
+++ b/gcc/lto-section-names.h
@@ -25,6 +25,11 @@ along with GCC; see the file COPYING3. If not see
name for the functions and static_initializers. For other types of
sections a '.' and the section type are appended. */
#define LTO_SECTION_NAME_PREFIX ".gnu.lto_"
+#define OMP_SECTION_NAME_PREFIX ".gnu.target_lto_"
+
+/* Can be either OMP_SECTION_NAME_PREFIX when we stream 'pragma omp target'
+ stuff, or LTO_SECTION_NAME_PREFIX for LTO case. */
+extern const char *section_name_prefix;
/* Segment name for LTO sections. This is only used for Mach-O. */
diff --git a/gcc/lto-streamer.c b/gcc/lto-streamer.c
index 3480723..95232f9 100644
--- a/gcc/lto-streamer.c
+++ b/gcc/lto-streamer.c
@@ -48,6 +48,8 @@ struct lto_stats_d lto_stats;
static bitmap_obstack lto_obstack;
static bool lto_obstack_initialized;
+bool offload_lto_mode = false;
+const char *section_name_prefix = LTO_SECTION_NAME_PREFIX;
/* Return a string representing LTO tag TAG. */
@@ -177,7 +179,7 @@ lto_get_section_name (int section_type, const char *name, struct lto_file_decl_d
sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, f->id);
else
sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, get_random_seed (false));
- return concat (LTO_SECTION_NAME_PREFIX, sep, add, post, NULL);
+ return concat (section_name_prefix, sep, add, post, NULL);
}
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index 4bec969..0016eef 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -831,6 +831,7 @@ bool referenced_from_this_partition_p (symtab_node *,
bool reachable_from_this_partition_p (struct cgraph_node *,
lto_symtab_encoder_t);
lto_symtab_encoder_t compute_ltrans_boundary (lto_symtab_encoder_t encoder);
+void select_what_to_dump (void);
/* In lto-symtab.c. */
@@ -846,6 +847,9 @@ extern void lto_write_options (void);
/* Statistics gathered during LTO, WPA and LTRANS. */
extern struct lto_stats_d lto_stats;
+/* Regular or offload mode of LTO. */
+extern bool offload_lto_mode;
+
/* Section names corresponding to the values of enum lto_section_type. */
extern const char *lto_section_name[];
diff --git a/gcc/lto/lto-object.c b/gcc/lto/lto-object.c
index 323f7b2..4ee752f 100644
--- a/gcc/lto/lto-object.c
+++ b/gcc/lto/lto-object.c
@@ -230,8 +230,7 @@ lto_obj_add_section (void *data, const char *name, off_t offset,
void **slot;
struct lto_section_list *list = loasd->list;
- if (strncmp (name, LTO_SECTION_NAME_PREFIX,
- strlen (LTO_SECTION_NAME_PREFIX)) != 0)
+ if (strncmp (name, section_name_prefix, strlen (section_name_prefix)))
return 1;
new_name = xstrdup (name);
diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
index 0451a66..332562f 100644
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto/lto-partition.c
@@ -134,6 +134,7 @@ add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node)
gcc_assert (c != SYMBOL_EXTERNAL
&& (c == SYMBOL_DUPLICATE || !symbol_partitioned_p (node)));
+ node->need_dump = true;
lto_set_symtab_encoder_in_partition (part->encoder, node);
if (symbol_partitioned_p (node))
@@ -920,6 +921,8 @@ lto_promote_cross_file_statics (void)
gcc_assert (flag_wpa);
+ select_what_to_dump ();
+
/* First compute boundaries. */
n_sets = ltrans_partitions.length ();
for (i = 0; i < n_sets; i++)
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 6cbb178..f23d997 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -2125,7 +2125,7 @@ lto_section_with_id (const char *name, unsigned HOST_WIDE_INT *id)
{
const char *s;
- if (strncmp (name, LTO_SECTION_NAME_PREFIX, strlen (LTO_SECTION_NAME_PREFIX)))
+ if (strncmp (name, section_name_prefix, strlen (section_name_prefix)))
return 0;
s = strrchr (name, '.');
return s && sscanf (s, "." HOST_WIDE_INT_PRINT_HEX_PURE, id) == 1;
@@ -2899,6 +2899,10 @@ read_cgraph_and_symbols (unsigned nfiles, const char **fnames)
timevar_push (TV_IPA_LTO_DECL_IN);
+#ifdef ACCEL_COMPILER
+ section_name_prefix = OMP_SECTION_NAME_PREFIX;
+#endif
+
real_file_decl_data
= decl_data = ggc_cleared_vec_alloc<lto_file_decl_data_ptr> (nfiles + 1);
real_file_count = nfiles;
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 82651ea..7d587b3 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -257,6 +257,16 @@ is_parallel_ctx (omp_context *ctx)
}
+/* Return true if CTX is for an omp target region. */
+
+static inline bool
+is_targetreg_ctx (omp_context *ctx)
+{
+ return gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET
+ && gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_REGION;
+}
+
+
/* Return true if CTX is for an omp task. */
static inline bool
@@ -1930,9 +1940,7 @@ create_omp_child_function (omp_context *ctx, bool task_copy)
{
omp_context *octx;
for (octx = ctx; octx; octx = octx->outer)
- if (gimple_code (octx->stmt) == GIMPLE_OMP_TARGET
- && gimple_omp_target_kind (octx->stmt)
- == GF_OMP_TARGET_KIND_REGION)
+ if (is_targetreg_ctx (octx))
{
target_p = true;
break;
@@ -2588,8 +2596,7 @@ check_omp_nesting_restrictions (gimple stmt, omp_context *ctx)
break;
case GIMPLE_OMP_TARGET:
for (; ctx != NULL; ctx = ctx->outer)
- if (gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET
- && gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_REGION)
+ if (is_targetreg_ctx (ctx))
{
const char *name;
switch (gimple_omp_target_kind (stmt))
@@ -8206,6 +8213,7 @@ expand_omp_target (struct omp_region *region)
if (kind == GF_OMP_TARGET_KIND_REGION)
{
unsigned srcidx, dstidx, num;
+ struct cgraph_node *node;
/* If the target region needs data sent from the parent
function, then the very first statement (except possible
@@ -8337,6 +8345,11 @@ expand_omp_target (struct omp_region *region)
push_cfun (child_cfun);
cgraph_edge::rebuild_edges ();
+ /* Prevent IPA from removing child_fn as unreachable, since there are no
+ refs from the parent function to the target side child_fn. */
+ node = cgraph_node::get (child_fn);
+ node->mark_force_output ();
+
/* Some EH regions might become dead, see PR34608. If
pass_cleanup_cfg isn't the first pass to happen with the
new child, these dead EH edges might cause problems.
@@ -9207,6 +9220,19 @@ lower_omp_critical (gimple_stmt_iterator *gsi_p, omp_context *ctx)
DECL_COMMON (decl) = 1;
DECL_ARTIFICIAL (decl) = 1;
DECL_IGNORED_P (decl) = 1;
+
+ /* If '#pragma omp critical' is inside target region, the symbol must
+ have an 'omp declare target' attribute. */
+ omp_context *octx;
+ for (octx = ctx->outer; octx; octx = octx->outer)
+ if (is_targetreg_ctx (octx))
+ {
+ DECL_ATTRIBUTES (decl)
+ = tree_cons (get_identifier ("omp declare target"),
+ NULL_TREE, DECL_ATTRIBUTES (decl));
+ break;
+ }
+
varpool_node::finalize_decl (decl);
splay_tree_insert (critical_name_mutexes, (splay_tree_key) name,
diff --git a/gcc/passes.c b/gcc/passes.c
index 5001c3d..d63c913 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -2308,6 +2308,8 @@ ipa_write_summaries (void)
if (!flag_generate_lto || seen_error ())
return;
+ select_what_to_dump ();
+
encoder = lto_symtab_encoder_new (false);
/* Create the callgraph set in the same order used in
This patch enables the streaming of LTO bytecode, needed by offload target,
using existing LTO infrastructure. It creates new prefix for the section names
(.gnu.target_lto_) and streams out the functions and variables with "omp declare
target" attribute, including the functions for outlined '#pragma omp target'
regions. The offload compiler (under ifdef ACCEL_COMPILER) reads and compiles
these new sections.
But I have doubts regarding the offload_lto_mode switch. Why I added it:
The outlined target regions (say omp_fn0) contains references from the parent
functions. And that's correct for the case when we stream out the host-side
version of omp_fn0. But for the target version there are no parent functions,
node->used_from_other_partition gets incorrect value (always 1), and offload
compiler crashes on streaming in.
Another solution is to remain referenced_from_other_partition_p and
reachable_from_other_partition_p unchanged, then used_from_other_partition will
have incorrect value for target regions, but the offload compiler will just
ignore it. Which approach is better?
Anyway, now it's bootstrapped and regtested on i686-linux and x86_64-linux.
2014-09-27 Ilya Verbin <***@intel.com>
Ilya Tocar <***@intel.com>
Andrey Turetskiy <***@intel.com>
Bernd Schmidt <***@codesourcery.com>
gcc/
* cgraph.h (symtab_node): Add need_dump flag.
* cgraphunit.c: Include lto-section-names.h.
(initialize_offload): New function.
(ipa_passes): Initialize offload and call ipa_write_summaries if there
is something to write to OMP_SECTION_NAME_PREFIX sections.
(symbol_table::compile): Call lto_streamer_hooks_init under flag_openmp.
* ipa-inline-analysis.c (inline_generate_summary): Do not exit under
flag_openmp.
(inline_free_summary): Always remove hooks.
* lto-cgraph.c (lto_set_symtab_encoder_in_partition): Exit if there is
no need to encode the node.
(referenced_from_other_partition_p, reachable_from_other_partition_p):
Ignore references from non-target functions to target functions if we
are streaming out target-side bytecode (offload lto mode).
(select_what_to_dump): New function.
* lto-section-names.h (OMP_SECTION_NAME_PREFIX): Define.
(section_name_prefix): Declare.
* lto-streamer.c (offload_lto_mode): New variable.
(section_name_prefix): New variable.
(lto_get_section_name): Use section_name_prefix instead of
LTO_SECTION_NAME_PREFIX.
* lto-streamer.h (select_what_to_dump): Declare.
(offload_lto_mode): Declare.
* omp-low.c (is_targetreg_ctx): New function.
(create_omp_child_function, check_omp_nesting_restrictions): Use it.
(expand_omp_target): Set mark_force_output for the target functions.
(lower_omp_critical): Add target attribute for omp critical symbol.
* passes.c (ipa_write_summaries): Call select_what_to_dump.
gcc/lto/
* lto-object.c (lto_obj_add_section): Use section_name_prefix instead of
LTO_SECTION_NAME_PREFIX.
* lto-partition.c (add_symbol_to_partition_1): Always set
node->need_dump to true.
(lto_promote_cross_file_statics): Call select_what_to_dump.
* lto.c (lto_section_with_id): Use section_name_prefix instead of
LTO_SECTION_NAME_PREFIX.
(read_cgraph_and_symbols): Read OMP_SECTION_NAME_PREFIX sections, if
being built as an offload compiler.
Thanks,
-- Ilya
---
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 7481906..9ab970d 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -444,6 +444,11 @@ public:
/* Set when init priority is set. */
unsigned in_init_priority_hash : 1;
+ /* Set when symbol needs to be dumped into LTO bytecode for LTO,
+ or in pragma omp target case, for separate compilation targeting
+ a different architecture. */
+ unsigned need_dump : 1;
+
/* Ordering of all symtab entries. */
int order;
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index b854e4b..4ab4c57 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -211,6 +211,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-nested.h"
#include "gimplify.h"
#include "dbgcnt.h"
+#include "lto-section-names.h"
/* Queue of cgraph nodes scheduled to be added into cgraph. This is a
secondary queue used during optimization to accommodate passes that
@@ -1994,9 +1995,40 @@ output_in_order (bool no_reorder)
free (nodes);
}
+/* Check whether there is at least one function or global variable to offload.
+ */
+
+static bool
+initialize_offload (void)
+{
+ bool have_offload = false;
+ struct cgraph_node *node;
+ struct varpool_node *vnode;
+
+ FOR_EACH_DEFINED_FUNCTION (node)
+ if (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (node->decl)))
+ {
+ have_offload = true;
+ break;
+ }
+
+ FOR_EACH_DEFINED_VARIABLE (vnode)
+ {
+ if (!lookup_attribute ("omp declare target",
+ DECL_ATTRIBUTES (vnode->decl))
+ || TREE_CODE (vnode->decl) != VAR_DECL
+ || DECL_SIZE (vnode->decl) == 0)
+ continue;
+ have_offload = true;
+ }
+
+ return have_offload;
+}
+
static void
ipa_passes (void)
{
+ bool have_offload = false;
gcc::pass_manager *passes = g->get_passes ();
set_cfun (NULL);
@@ -2004,6 +2036,14 @@ ipa_passes (void)
gimple_register_cfg_hooks ();
bitmap_obstack_initialize (NULL);
+ if (!in_lto_p && flag_openmp)
+ {
+ have_offload = initialize_offload ();
+ /* OpenMP offloading requires LTO infrastructure. */
+ if (have_offload)
+ flag_generate_lto = 1;
+ }
+
invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_START, NULL);
if (!in_lto_p)
@@ -2041,7 +2081,20 @@ ipa_passes (void)
targetm.asm_out.lto_start ();
if (!in_lto_p)
- ipa_write_summaries ();
+ {
+ if (have_offload)
+ {
+ offload_lto_mode = true;
+ section_name_prefix = OMP_SECTION_NAME_PREFIX;
+ ipa_write_summaries ();
+ }
+ if (flag_lto)
+ {
+ offload_lto_mode = false;
+ section_name_prefix = LTO_SECTION_NAME_PREFIX;
+ ipa_write_summaries ();
+ }
+ }
if (flag_generate_lto)
targetm.asm_out.lto_end ();
@@ -2122,7 +2175,7 @@ symbol_table::compile (void)
state = IPA;
/* If LTO is enabled, initialize the streamer hooks needed by GIMPLE. */
- if (flag_lto)
+ if (flag_lto || flag_openmp)
lto_streamer_hooks_init ();
/* Don't run the IPA passes if there was any error or sorry messages. */
diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c
index 38f56d2..076a1e8 100644
--- a/gcc/ipa-inline-analysis.c
+++ b/gcc/ipa-inline-analysis.c
@@ -4010,7 +4010,7 @@ inline_generate_summary (void)
/* When not optimizing, do not bother to analyze. Inlining is still done
because edge redirection needs to happen there. */
- if (!optimize && !flag_lto && !flag_wpa)
+ if (!optimize && !flag_lto && !flag_wpa && !flag_openmp)
return;
function_insertion_hook_holder =
@@ -4325,11 +4325,6 @@ void
inline_free_summary (void)
{
struct cgraph_node *node;
- if (!inline_edge_summary_vec.exists ())
- return;
- FOR_EACH_DEFINED_FUNCTION (node)
- if (!node->alias)
- reset_inline_summary (node);
if (function_insertion_hook_holder)
symtab->remove_cgraph_insertion_hook (function_insertion_hook_holder);
function_insertion_hook_holder = NULL;
@@ -4345,6 +4340,11 @@ inline_free_summary (void)
if (edge_duplication_hook_holder)
symtab->remove_edge_duplication_hook (edge_duplication_hook_holder);
edge_duplication_hook_holder = NULL;
+ if (!inline_edge_summary_vec.exists ())
+ return;
+ FOR_EACH_DEFINED_FUNCTION (node)
+ if (!node->alias)
+ reset_inline_summary (node);
vec_free (inline_summary_vec);
inline_edge_summary_vec.release ();
if (edge_predicate_pool)
diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
index 0584946..78b7fc8 100644
--- a/gcc/lto-cgraph.c
+++ b/gcc/lto-cgraph.c
@@ -239,6 +239,9 @@ void
lto_set_symtab_encoder_in_partition (lto_symtab_encoder_t encoder,
symtab_node *node)
{
+ /* Ignore not needed nodes. */
+ if (!node->need_dump)
+ return;
int index = lto_symtab_encoder_encode (encoder, node);
encoder->nodes[index].in_partition = true;
}
@@ -321,6 +324,12 @@ referenced_from_other_partition_p (symtab_node *node, lto_symtab_encoder_t encod
for (i = 0; node->iterate_referring (i, ref); i++)
{
+ /* Ignore references from non-target functions in offload lto mode. */
+ if (offload_lto_mode
+ && !lookup_attribute ("omp declare target",
+ DECL_ATTRIBUTES (ref->referring->decl)))
+ continue;
+
if (ref->referring->in_other_partition
|| !lto_symtab_encoder_in_partition_p (encoder, ref->referring))
return true;
@@ -339,9 +348,17 @@ reachable_from_other_partition_p (struct cgraph_node *node, lto_symtab_encoder_t
if (node->global.inlined_to)
return false;
for (e = node->callers; e; e = e->next_caller)
- if (e->caller->in_other_partition
- || !lto_symtab_encoder_in_partition_p (encoder, e->caller))
- return true;
+ {
+ /* Ignore references from non-target functions in offload lto mode. */
+ if (offload_lto_mode
+ && !lookup_attribute ("omp declare target",
+ DECL_ATTRIBUTES (e->caller->decl)))
+ continue;
+
+ if (e->caller->in_other_partition
+ || !lto_symtab_encoder_in_partition_p (encoder, e->caller))
+ return true;
+ }
return false;
}
@@ -802,6 +819,18 @@ create_references (lto_symtab_encoder_t encoder, symtab_node *node)
lto_symtab_encoder_encode (encoder, ref->referred);
}
+/* Select what needs to be streamed out. In regular lto mode stream everything.
+ In offload lto mode stream only stuff marked with an attribute. */
+void
+select_what_to_dump (void)
+{
+ struct symtab_node *snode;
+ FOR_EACH_SYMBOL (snode)
+ snode->need_dump = !offload_lto_mode
+ || lookup_attribute ("omp declare target",
+ DECL_ATTRIBUTES (snode->decl));
+}
+
/* Find all symbols we want to stream into given partition and insert them
to encoders.
diff --git a/gcc/lto-section-names.h b/gcc/lto-section-names.h
index cb75230..06d2caf 100644
--- a/gcc/lto-section-names.h
+++ b/gcc/lto-section-names.h
@@ -25,6 +25,11 @@ along with GCC; see the file COPYING3. If not see
name for the functions and static_initializers. For other types of
sections a '.' and the section type are appended. */
#define LTO_SECTION_NAME_PREFIX ".gnu.lto_"
+#define OMP_SECTION_NAME_PREFIX ".gnu.target_lto_"
+
+/* Can be either OMP_SECTION_NAME_PREFIX when we stream 'pragma omp target'
+ stuff, or LTO_SECTION_NAME_PREFIX for LTO case. */
+extern const char *section_name_prefix;
/* Segment name for LTO sections. This is only used for Mach-O. */
diff --git a/gcc/lto-streamer.c b/gcc/lto-streamer.c
index 3480723..95232f9 100644
--- a/gcc/lto-streamer.c
+++ b/gcc/lto-streamer.c
@@ -48,6 +48,8 @@ struct lto_stats_d lto_stats;
static bitmap_obstack lto_obstack;
static bool lto_obstack_initialized;
+bool offload_lto_mode = false;
+const char *section_name_prefix = LTO_SECTION_NAME_PREFIX;
/* Return a string representing LTO tag TAG. */
@@ -177,7 +179,7 @@ lto_get_section_name (int section_type, const char *name, struct lto_file_decl_d
sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, f->id);
else
sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, get_random_seed (false));
- return concat (LTO_SECTION_NAME_PREFIX, sep, add, post, NULL);
+ return concat (section_name_prefix, sep, add, post, NULL);
}
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index 4bec969..0016eef 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -831,6 +831,7 @@ bool referenced_from_this_partition_p (symtab_node *,
bool reachable_from_this_partition_p (struct cgraph_node *,
lto_symtab_encoder_t);
lto_symtab_encoder_t compute_ltrans_boundary (lto_symtab_encoder_t encoder);
+void select_what_to_dump (void);
/* In lto-symtab.c. */
@@ -846,6 +847,9 @@ extern void lto_write_options (void);
/* Statistics gathered during LTO, WPA and LTRANS. */
extern struct lto_stats_d lto_stats;
+/* Regular or offload mode of LTO. */
+extern bool offload_lto_mode;
+
/* Section names corresponding to the values of enum lto_section_type. */
extern const char *lto_section_name[];
diff --git a/gcc/lto/lto-object.c b/gcc/lto/lto-object.c
index 323f7b2..4ee752f 100644
--- a/gcc/lto/lto-object.c
+++ b/gcc/lto/lto-object.c
@@ -230,8 +230,7 @@ lto_obj_add_section (void *data, const char *name, off_t offset,
void **slot;
struct lto_section_list *list = loasd->list;
- if (strncmp (name, LTO_SECTION_NAME_PREFIX,
- strlen (LTO_SECTION_NAME_PREFIX)) != 0)
+ if (strncmp (name, section_name_prefix, strlen (section_name_prefix)))
return 1;
new_name = xstrdup (name);
diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
index 0451a66..332562f 100644
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto/lto-partition.c
@@ -134,6 +134,7 @@ add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node)
gcc_assert (c != SYMBOL_EXTERNAL
&& (c == SYMBOL_DUPLICATE || !symbol_partitioned_p (node)));
+ node->need_dump = true;
lto_set_symtab_encoder_in_partition (part->encoder, node);
if (symbol_partitioned_p (node))
@@ -920,6 +921,8 @@ lto_promote_cross_file_statics (void)
gcc_assert (flag_wpa);
+ select_what_to_dump ();
+
/* First compute boundaries. */
n_sets = ltrans_partitions.length ();
for (i = 0; i < n_sets; i++)
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 6cbb178..f23d997 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -2125,7 +2125,7 @@ lto_section_with_id (const char *name, unsigned HOST_WIDE_INT *id)
{
const char *s;
- if (strncmp (name, LTO_SECTION_NAME_PREFIX, strlen (LTO_SECTION_NAME_PREFIX)))
+ if (strncmp (name, section_name_prefix, strlen (section_name_prefix)))
return 0;
s = strrchr (name, '.');
return s && sscanf (s, "." HOST_WIDE_INT_PRINT_HEX_PURE, id) == 1;
@@ -2899,6 +2899,10 @@ read_cgraph_and_symbols (unsigned nfiles, const char **fnames)
timevar_push (TV_IPA_LTO_DECL_IN);
+#ifdef ACCEL_COMPILER
+ section_name_prefix = OMP_SECTION_NAME_PREFIX;
+#endif
+
real_file_decl_data
= decl_data = ggc_cleared_vec_alloc<lto_file_decl_data_ptr> (nfiles + 1);
real_file_count = nfiles;
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 82651ea..7d587b3 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -257,6 +257,16 @@ is_parallel_ctx (omp_context *ctx)
}
+/* Return true if CTX is for an omp target region. */
+
+static inline bool
+is_targetreg_ctx (omp_context *ctx)
+{
+ return gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET
+ && gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_REGION;
+}
+
+
/* Return true if CTX is for an omp task. */
static inline bool
@@ -1930,9 +1940,7 @@ create_omp_child_function (omp_context *ctx, bool task_copy)
{
omp_context *octx;
for (octx = ctx; octx; octx = octx->outer)
- if (gimple_code (octx->stmt) == GIMPLE_OMP_TARGET
- && gimple_omp_target_kind (octx->stmt)
- == GF_OMP_TARGET_KIND_REGION)
+ if (is_targetreg_ctx (octx))
{
target_p = true;
break;
@@ -2588,8 +2596,7 @@ check_omp_nesting_restrictions (gimple stmt, omp_context *ctx)
break;
case GIMPLE_OMP_TARGET:
for (; ctx != NULL; ctx = ctx->outer)
- if (gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET
- && gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_REGION)
+ if (is_targetreg_ctx (ctx))
{
const char *name;
switch (gimple_omp_target_kind (stmt))
@@ -8206,6 +8213,7 @@ expand_omp_target (struct omp_region *region)
if (kind == GF_OMP_TARGET_KIND_REGION)
{
unsigned srcidx, dstidx, num;
+ struct cgraph_node *node;
/* If the target region needs data sent from the parent
function, then the very first statement (except possible
@@ -8337,6 +8345,11 @@ expand_omp_target (struct omp_region *region)
push_cfun (child_cfun);
cgraph_edge::rebuild_edges ();
+ /* Prevent IPA from removing child_fn as unreachable, since there are no
+ refs from the parent function to the target side child_fn. */
+ node = cgraph_node::get (child_fn);
+ node->mark_force_output ();
+
/* Some EH regions might become dead, see PR34608. If
pass_cleanup_cfg isn't the first pass to happen with the
new child, these dead EH edges might cause problems.
@@ -9207,6 +9220,19 @@ lower_omp_critical (gimple_stmt_iterator *gsi_p, omp_context *ctx)
DECL_COMMON (decl) = 1;
DECL_ARTIFICIAL (decl) = 1;
DECL_IGNORED_P (decl) = 1;
+
+ /* If '#pragma omp critical' is inside target region, the symbol must
+ have an 'omp declare target' attribute. */
+ omp_context *octx;
+ for (octx = ctx->outer; octx; octx = octx->outer)
+ if (is_targetreg_ctx (octx))
+ {
+ DECL_ATTRIBUTES (decl)
+ = tree_cons (get_identifier ("omp declare target"),
+ NULL_TREE, DECL_ATTRIBUTES (decl));
+ break;
+ }
+
varpool_node::finalize_decl (decl);
splay_tree_insert (critical_name_mutexes, (splay_tree_key) name,
diff --git a/gcc/passes.c b/gcc/passes.c
index 5001c3d..d63c913 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -2308,6 +2308,8 @@ ipa_write_summaries (void)
if (!flag_generate_lto || seen_error ())
return;
+ select_what_to_dump ();
+
encoder = lto_symtab_encoder_new (false);
/* Create the callgraph set in the same order used in
--
1.7.1
1.7.1