Thomas Preud'homme
2014-10-08 01:56:51 UTC
Currently the bswap pass only look for bswap pattern by examining bitwise
OR statement and doing following def-use chains. However a rotation
(left or right) can finish a manual byteswap, as shown in the following example:
unsigned
byteswap_ending_with_rotation (unsigned in)
{
in = ((in & 0xff00ff00) >> 8) | ((in & 0x00ff00ff) << 8);
in = ((in & 0xffff0000) >> 16) | ((in & 0x0000ffff) << 16);
return in;
}
which is compiled into:
byteswap_ending_with_rotation (unsigned int in)
{
unsigned int _2;
unsigned int _3;
unsigned int _4;
unsigned int _5;
<bb 2>:
_2 = in_1(D) & 4278255360;
_3 = _2 >> 8;
_4 = in_1(D) & 16711935;
_5 = _4 << 8;
in_6 = _5 | _3;
in_7 = in_6 r>> 16;
return in_7;
}
This patch adds rotation (left and right) to the list of statement to consider for byte swap.
ChangeLog are as follows:
*** gcc/ChangeLog ***
2014-09-30 Thomas Preud'homme <***@arm.com>
PR tree-optimization/63259
* tree-ssa-math-opts.c (pass_optimize_bswap::execute): Also consider
bswap in LROTATE_EXPR and RROTATE_EXPR statements.
*** gcc/testsuite/ChangeLog ***
2014-09-30 Thomas Preud'homme <***@arm.com>
PR tree-optimization/63259
* optimize-bswapsi-1.c (swap32_e): New bswap pass test.
diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c
index 580e6e0..d4b5740 100644
--- a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c
+++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c
@@ -64,5 +64,16 @@ swap32_d (SItype in)
| (((in >> 24) & 0xFF) << 0);
}
-/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 4 "bswap" } } */
+/* This variant comes from PR63259. It compiles to a gimple sequence that ends
+ with a rotation instead of a bitwise OR. */
+
+unsigned
+swap32_e (unsigned in)
+{
+ in = ((in & 0xff00ff00) >> 8) | ((in & 0x00ff00ff) << 8);
+ in = ((in & 0xffff0000) >> 16) | ((in & 0x0000ffff) << 16);
+ return in;
+}
+
+/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 5 "bswap" } } */
/* { dg-final { cleanup-tree-dump "bswap" } } */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 3c6e935..2023f2e 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2377,11 +2377,16 @@ pass_optimize_bswap::execute (function *fun)
{
gimple src_stmt, cur_stmt = gsi_stmt (gsi);
tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
+ enum tree_code code;
struct symbolic_number n;
bool bswap;
- if (!is_gimple_assign (cur_stmt)
- || gimple_assign_rhs_code (cur_stmt) != BIT_IOR_EXPR)
+ if (!is_gimple_assign (cur_stmt))
+ continue;
+
+ code = gimple_assign_rhs_code (cur_stmt);
+ if (code != BIT_IOR_EXPR && code != LROTATE_EXPR
+ && code != RROTATE_EXPR)
continue;
src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
Testing was done by running the testsuite on arm-none-eabi target with QEMU
emulating Cortex-M3: no regression were found. Due to the potential increase
in compilation time, A bootstrap with sequential build (no -j option when calling
make) and with default option was made with and without the patch. The
results shows no increase compilation time:
r215662 with patch:
make 6167.48s user 401.03s system 99% cpu 1:49:52.07 total
r215662 without patch
make 6136.63s user 400.32s system 99% cpu 1:49:27.28 total
Is it ok for trunk?
Best regards,
Thomas Preud'homme
OR statement and doing following def-use chains. However a rotation
(left or right) can finish a manual byteswap, as shown in the following example:
unsigned
byteswap_ending_with_rotation (unsigned in)
{
in = ((in & 0xff00ff00) >> 8) | ((in & 0x00ff00ff) << 8);
in = ((in & 0xffff0000) >> 16) | ((in & 0x0000ffff) << 16);
return in;
}
which is compiled into:
byteswap_ending_with_rotation (unsigned int in)
{
unsigned int _2;
unsigned int _3;
unsigned int _4;
unsigned int _5;
<bb 2>:
_2 = in_1(D) & 4278255360;
_3 = _2 >> 8;
_4 = in_1(D) & 16711935;
_5 = _4 << 8;
in_6 = _5 | _3;
in_7 = in_6 r>> 16;
return in_7;
}
This patch adds rotation (left and right) to the list of statement to consider for byte swap.
ChangeLog are as follows:
*** gcc/ChangeLog ***
2014-09-30 Thomas Preud'homme <***@arm.com>
PR tree-optimization/63259
* tree-ssa-math-opts.c (pass_optimize_bswap::execute): Also consider
bswap in LROTATE_EXPR and RROTATE_EXPR statements.
*** gcc/testsuite/ChangeLog ***
2014-09-30 Thomas Preud'homme <***@arm.com>
PR tree-optimization/63259
* optimize-bswapsi-1.c (swap32_e): New bswap pass test.
diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c
index 580e6e0..d4b5740 100644
--- a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c
+++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c
@@ -64,5 +64,16 @@ swap32_d (SItype in)
| (((in >> 24) & 0xFF) << 0);
}
-/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 4 "bswap" } } */
+/* This variant comes from PR63259. It compiles to a gimple sequence that ends
+ with a rotation instead of a bitwise OR. */
+
+unsigned
+swap32_e (unsigned in)
+{
+ in = ((in & 0xff00ff00) >> 8) | ((in & 0x00ff00ff) << 8);
+ in = ((in & 0xffff0000) >> 16) | ((in & 0x0000ffff) << 16);
+ return in;
+}
+
+/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 5 "bswap" } } */
/* { dg-final { cleanup-tree-dump "bswap" } } */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 3c6e935..2023f2e 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2377,11 +2377,16 @@ pass_optimize_bswap::execute (function *fun)
{
gimple src_stmt, cur_stmt = gsi_stmt (gsi);
tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
+ enum tree_code code;
struct symbolic_number n;
bool bswap;
- if (!is_gimple_assign (cur_stmt)
- || gimple_assign_rhs_code (cur_stmt) != BIT_IOR_EXPR)
+ if (!is_gimple_assign (cur_stmt))
+ continue;
+
+ code = gimple_assign_rhs_code (cur_stmt);
+ if (code != BIT_IOR_EXPR && code != LROTATE_EXPR
+ && code != RROTATE_EXPR)
continue;
src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
Testing was done by running the testsuite on arm-none-eabi target with QEMU
emulating Cortex-M3: no regression were found. Due to the potential increase
in compilation time, A bootstrap with sequential build (no -j option when calling
make) and with default option was made with and without the patch. The
results shows no increase compilation time:
r215662 with patch:
make 6167.48s user 401.03s system 99% cpu 1:49:52.07 total
r215662 without patch
make 6136.63s user 400.32s system 99% cpu 1:49:27.28 total
Is it ok for trunk?
Best regards,
Thomas Preud'homme