summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Bergner <bergner@linux.ibm.com>2022-05-18 02:09:29 (GMT)
committerPeter Bergner <bergner@linux.ibm.com>2022-05-18 02:10:27 (GMT)
commitc6e36f05fbb081abb068958d8900ad34b303a70b (patch)
treefbb1fbe9de9e09c20aa02d853a0e89c27337f7d1
parent3d9439b1bb76c186958d5b86f0076f8b3017b8a2 (diff)
downloadgcc-c6e36f05fbb081abb068958d8900ad34b303a70b.zip
gcc-c6e36f05fbb081abb068958d8900ad34b303a70b.tar.gz
gcc-c6e36f05fbb081abb068958d8900ad34b303a70b.tar.bz2
rs6000: Prefer assigning the MMA vector operands to altivec registers [PR105556]
When optimizing the DGEMM kernel in OpenBLAS to use MMA, the MMA code uses all 8 accumulators, which overlap all vs0-vs31 vector registers. Current trunk assigns one of the normal vector inputs to one of the MMA instructions, which forces us to spill one of the accumulators to memory, leading to poor performance. The solution here is to replace the "wa" constraints for the vector input operands in the MMA instruction patterns with "v,?wa" so that we prefer using the altivec registers vs32-vs63 over the vs0-vs31 registers. 2022-05-17 Peter Bergner <bergner@linux.ibm.com> Segher Boessenkool <segher@kernel.crashing.org> gcc/ PR target/105556 * config/rs6000/mma.md (mma_<vv>, mma_<avv>, mma_<pv>, mma_<apv>, mma_<vvi4i4i8>, mma_<avvi4i4i8>, mma_<vvi4i4i2>, mma_<avvi4i4i2>, mma_<vvi4i4>, mma_<avvi4i4>, mma_<pvi4i2>, mma_<apvi4i2>, mma_<vvi4i4i4>, mma_<avvi4i4i4>): Replace "wa" constraints with "v,?wa". Update other operands accordingly.
-rw-r--r--gcc/config/rs6000/mma.md150
1 files changed, 75 insertions, 75 deletions
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 907c9d6..a183b6a 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -490,50 +490,50 @@
[(set_attr "type" "mma")])
(define_insn "mma_<vv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
- (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
"TARGET_MMA"
"<vv> %A0,%x1,%x2"
[(set_attr "type" "mma")])
(define_insn "mma_<avv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
- (match_operand:V16QI 2 "vsx_register_operand" "wa")
- (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
"TARGET_MMA"
"<avv> %A0,%x2,%x3"
[(set_attr "type" "mma")])
(define_insn "mma_<pv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
- (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
"TARGET_MMA"
"<pv> %A0,%x1,%x2"
[(set_attr "type" "mma")])
(define_insn "mma_<apv>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
- (match_operand:OO 2 "vsx_register_operand" "wa")
- (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+ (match_operand:OO 2 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
"TARGET_MMA"
"<apv> %A0,%x2,%x3"
[(set_attr "type" "mma")])
(define_insn "mma_<vvi4i4i8>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
- (match_operand:V16QI 2 "vsx_register_operand" "wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n")
- (match_operand:SI 4 "const_0_to_15_operand" "n")
- (match_operand:SI 5 "u8bit_cint_operand" "n")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
MMA_VVI4I4I8))]
"TARGET_MMA"
"<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
@@ -541,13 +541,13 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<avvi4i4i8>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
- (match_operand:V16QI 2 "vsx_register_operand" "wa")
- (match_operand:V16QI 3 "vsx_register_operand" "wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n")
- (match_operand:SI 5 "const_0_to_15_operand" "n")
- (match_operand:SI 6 "u8bit_cint_operand" "n")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
MMA_AVVI4I4I8))]
"TARGET_MMA"
"<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
@@ -555,12 +555,12 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<vvi4i4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
- (match_operand:V16QI 2 "vsx_register_operand" "wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n")
- (match_operand:SI 4 "const_0_to_15_operand" "n")
- (match_operand:SI 5 "const_0_to_3_operand" "n")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
MMA_VVI4I4I2))]
"TARGET_MMA"
"<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
@@ -568,13 +568,13 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<avvi4i4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
- (match_operand:V16QI 2 "vsx_register_operand" "wa")
- (match_operand:V16QI 3 "vsx_register_operand" "wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n")
- (match_operand:SI 5 "const_0_to_15_operand" "n")
- (match_operand:SI 6 "const_0_to_3_operand" "n")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
MMA_AVVI4I4I2))]
"TARGET_MMA"
"<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
@@ -582,11 +582,11 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<vvi4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
- (match_operand:V16QI 2 "vsx_register_operand" "wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n")
- (match_operand:SI 4 "const_0_to_15_operand" "n")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
MMA_VVI4I4))]
"TARGET_MMA"
"<vvi4i4> %A0,%x1,%x2,%3,%4"
@@ -594,12 +594,12 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<avvi4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
- (match_operand:V16QI 2 "vsx_register_operand" "wa")
- (match_operand:V16QI 3 "vsx_register_operand" "wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n")
- (match_operand:SI 5 "const_0_to_15_operand" "n")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
MMA_AVVI4I4))]
"TARGET_MMA"
"<avvi4i4> %A0,%x2,%x3,%4,%5"
@@ -607,11 +607,11 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<pvi4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
- (match_operand:V16QI 2 "vsx_register_operand" "wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n")
- (match_operand:SI 4 "const_0_to_3_operand" "n")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
MMA_PVI4I2))]
"TARGET_MMA"
"<pvi4i2> %A0,%x1,%x2,%3,%4"
@@ -619,12 +619,12 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<apvi4i2>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
- (match_operand:OO 2 "vsx_register_operand" "wa")
- (match_operand:V16QI 3 "vsx_register_operand" "wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n")
- (match_operand:SI 5 "const_0_to_3_operand" "n")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+ (match_operand:OO 2 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
MMA_APVI4I2))]
"TARGET_MMA"
"<apvi4i2> %A0,%x2,%x3,%4,%5"
@@ -632,12 +632,12 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<vvi4i4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
- (match_operand:V16QI 2 "vsx_register_operand" "wa")
- (match_operand:SI 3 "const_0_to_15_operand" "n")
- (match_operand:SI 4 "const_0_to_15_operand" "n")
- (match_operand:SI 5 "const_0_to_15_operand" "n")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
MMA_VVI4I4I4))]
"TARGET_MMA"
"<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
@@ -645,13 +645,13 @@
(set_attr "prefixed" "yes")])
(define_insn "mma_<avvi4i4i4>"
- [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
- (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
- (match_operand:V16QI 2 "vsx_register_operand" "wa")
- (match_operand:V16QI 3 "vsx_register_operand" "wa")
- (match_operand:SI 4 "const_0_to_15_operand" "n")
- (match_operand:SI 5 "const_0_to_15_operand" "n")
- (match_operand:SI 6 "const_0_to_15_operand" "n")]
+ [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+ (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+ (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+ (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
+ (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 5 "const_0_to_15_operand" "n,n")
+ (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
MMA_AVVI4I4I4))]
"TARGET_MMA"
"<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"