Bill Schmidt
2014-09-29 22:26:14 UTC
Hi,
Up till now we have not attempted to generate code for LE usage of
vec_lvsl and vec_lvsr that is compatible with expected BE usage. The LE
code sequence corresponding to lvsl/vperm is not good, and we encourage
programmers to convert those sequences to use direct assignment and the
type system for unaligned loads. However, the issue comes up frequently
enough that it seems best to provide this sequence together with a
warning message (in a previous patch submission) to avoid confusion.
The method used in this patch is to perform a byte-reversal of the
result of the lvsl/lvsr. This is accomplished by loading the vector
char constant {0,1,...,15}, which will appear in the register from left
to right as {15,...,1,0}. A vperm instruction (which uses BE element
ordering) is applied to the result of the lvsl/lvsr using the loaded
constant as the permute control vector.
Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions. Is this ok for trunk?
Thanks,
Bill
[gcc]
2014-09-29 Bill Schmidt <***@linux.vnet.ibm.com>
* altivec.md (altivec_lvsl): New define_expand.
(altivec_lvsl_direct): Rename define_insn from altivec_lvsl.
(altivec_lvsr): New define_expand.
(altivec_lvsr_direct): Rename define_insn from altivec_lvsr.
* rs6000.c (rs6000_expand_builtin): Change to use
altivec_lvs[lr]_direct; remove commented-out code.
[gcc/testsuite]
2014-09-29 Bill Schmidt <***@linux.vnet.ibm.com>
* gcc.target/powerpc/lvsl-lvsr.c: New test.
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md (revision 215689)
+++ gcc/config/rs6000/altivec.md (working copy)
@@ -2297,7 +2297,32 @@
"dststt %0,%1,%2"
[(set_attr "type" "vecsimple")])
-(define_insn "altivec_lvsl"
+(define_expand "altivec_lvsl"
+ [(use (match_operand:V16QI 0 "register_operand" ""))
+ (use (match_operand:V16QI 1 "memory_operand" "Z"))]
+ "TARGET_ALTIVEC"
+ "
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_altivec_lvsl_direct (operands[0], operands[1]));
+ else
+ {
+ int i;
+ rtx mask, perm[16], constv, vperm;
+ mask = gen_reg_rtx (V16QImode);
+ emit_insn (gen_altivec_lvsl_direct (mask, operands[1]));
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (i);
+ constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+ constv = force_reg (V16QImode, constv);
+ vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv),
+ UNSPEC_VPERM);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm));
+ }
+ DONE;
+}")
+
+(define_insn "altivec_lvsl_direct"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
UNSPEC_LVSL))]
@@ -2305,7 +2330,32 @@
"lvsl %0,%y1"
[(set_attr "type" "vecload")])
-(define_insn "altivec_lvsr"
+(define_expand "altivec_lvsr"
+ [(use (match_operand:V16QI 0 "register_operand" ""))
+ (use (match_operand:V16QI 1 "memory_operand" "Z"))]
+ "TARGET_ALTIVEC"
+ "
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_altivec_lvsr_direct (operands[0], operands[1]));
+ else
+ {
+ int i;
+ rtx mask, perm[16], constv, vperm;
+ mask = gen_reg_rtx (V16QImode);
+ emit_insn (gen_altivec_lvsr_direct (mask, operands[1]));
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (i);
+ constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+ constv = force_reg (V16QImode, constv);
+ vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv),
+ UNSPEC_VPERM);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm));
+ }
+ DONE;
+}")
+
+(define_insn "altivec_lvsr_direct"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
UNSPEC_LVSR))]
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (revision 215689)
+++ gcc/config/rs6000/rs6000.c (working copy)
@@ -13898,8 +13898,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx s
case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
case ALTIVEC_BUILTIN_MASK_FOR_STORE:
{
- int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr
- : (int) CODE_FOR_altivec_lvsl);
+ int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
enum machine_mode tmode = insn_data[icode].operand[0].mode;
enum machine_mode mode = insn_data[icode].operand[1].mode;
tree arg;
@@ -13927,7 +13927,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx s
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
target = gen_reg_rtx (tmode);
- /*pat = gen_altivec_lvsr (target, op);*/
pat = GEN_FCN (icode) (target, op);
if (!pat)
return 0;
Index: gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c (working copy)
@@ -0,0 +1,19 @@
+/* Test expected code generation for lvsl and lvsr on little endian. */
+
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-options "-O0 -Wno-deprecated" } */
+/* { dg-final { scan-assembler-times "lvsl" 1 } } */
+/* { dg-final { scan-assembler-times "lvsr" 1 } } */
+/* { dg-final { scan-assembler-times "lxvd2x" 2 } } */
+/* { dg-final { scan-assembler-times "vperm" 2 } } */
+
+
+#include <altivec.h>
+
+float f[20];
+
+void foo ()
+{
+ vector unsigned char a = vec_lvsl (4, f);
+ vector unsigned char b = vec_lvsr (8, f);
+}
Up till now we have not attempted to generate code for LE usage of
vec_lvsl and vec_lvsr that is compatible with expected BE usage. The LE
code sequence corresponding to lvsl/vperm is not good, and we encourage
programmers to convert those sequences to use direct assignment and the
type system for unaligned loads. However, the issue comes up frequently
enough that it seems best to provide this sequence together with a
warning message (in a previous patch submission) to avoid confusion.
The method used in this patch is to perform a byte-reversal of the
result of the lvsl/lvsr. This is accomplished by loading the vector
char constant {0,1,...,15}, which will appear in the register from left
to right as {15,...,1,0}. A vperm instruction (which uses BE element
ordering) is applied to the result of the lvsl/lvsr using the loaded
constant as the permute control vector.
Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions. Is this ok for trunk?
Thanks,
Bill
[gcc]
2014-09-29 Bill Schmidt <***@linux.vnet.ibm.com>
* altivec.md (altivec_lvsl): New define_expand.
(altivec_lvsl_direct): Rename define_insn from altivec_lvsl.
(altivec_lvsr): New define_expand.
(altivec_lvsr_direct): Rename define_insn from altivec_lvsr.
* rs6000.c (rs6000_expand_builtin): Change to use
altivec_lvs[lr]_direct; remove commented-out code.
[gcc/testsuite]
2014-09-29 Bill Schmidt <***@linux.vnet.ibm.com>
* gcc.target/powerpc/lvsl-lvsr.c: New test.
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md (revision 215689)
+++ gcc/config/rs6000/altivec.md (working copy)
@@ -2297,7 +2297,32 @@
"dststt %0,%1,%2"
[(set_attr "type" "vecsimple")])
-(define_insn "altivec_lvsl"
+(define_expand "altivec_lvsl"
+ [(use (match_operand:V16QI 0 "register_operand" ""))
+ (use (match_operand:V16QI 1 "memory_operand" "Z"))]
+ "TARGET_ALTIVEC"
+ "
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_altivec_lvsl_direct (operands[0], operands[1]));
+ else
+ {
+ int i;
+ rtx mask, perm[16], constv, vperm;
+ mask = gen_reg_rtx (V16QImode);
+ emit_insn (gen_altivec_lvsl_direct (mask, operands[1]));
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (i);
+ constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+ constv = force_reg (V16QImode, constv);
+ vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv),
+ UNSPEC_VPERM);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm));
+ }
+ DONE;
+}")
+
+(define_insn "altivec_lvsl_direct"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
UNSPEC_LVSL))]
@@ -2305,7 +2330,32 @@
"lvsl %0,%y1"
[(set_attr "type" "vecload")])
-(define_insn "altivec_lvsr"
+(define_expand "altivec_lvsr"
+ [(use (match_operand:V16QI 0 "register_operand" ""))
+ (use (match_operand:V16QI 1 "memory_operand" "Z"))]
+ "TARGET_ALTIVEC"
+ "
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_altivec_lvsr_direct (operands[0], operands[1]));
+ else
+ {
+ int i;
+ rtx mask, perm[16], constv, vperm;
+ mask = gen_reg_rtx (V16QImode);
+ emit_insn (gen_altivec_lvsr_direct (mask, operands[1]));
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (i);
+ constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+ constv = force_reg (V16QImode, constv);
+ vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv),
+ UNSPEC_VPERM);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm));
+ }
+ DONE;
+}")
+
+(define_insn "altivec_lvsr_direct"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
UNSPEC_LVSR))]
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (revision 215689)
+++ gcc/config/rs6000/rs6000.c (working copy)
@@ -13898,8 +13898,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx s
case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
case ALTIVEC_BUILTIN_MASK_FOR_STORE:
{
- int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr
- : (int) CODE_FOR_altivec_lvsl);
+ int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
enum machine_mode tmode = insn_data[icode].operand[0].mode;
enum machine_mode mode = insn_data[icode].operand[1].mode;
tree arg;
@@ -13927,7 +13927,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx s
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
target = gen_reg_rtx (tmode);
- /*pat = gen_altivec_lvsr (target, op);*/
pat = GEN_FCN (icode) (target, op);
if (!pat)
return 0;
Index: gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c (working copy)
@@ -0,0 +1,19 @@
+/* Test expected code generation for lvsl and lvsr on little endian. */
+
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-options "-O0 -Wno-deprecated" } */
+/* { dg-final { scan-assembler-times "lvsl" 1 } } */
+/* { dg-final { scan-assembler-times "lvsr" 1 } } */
+/* { dg-final { scan-assembler-times "lxvd2x" 2 } } */
+/* { dg-final { scan-assembler-times "vperm" 2 } } */
+
+
+#include <altivec.h>
+
+float f[20];
+
+void foo ()
+{
+ vector unsigned char a = vec_lvsl (4, f);
+ vector unsigned char b = vec_lvsr (8, f);
+}