summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSylvain Henry <sylvain@haskus.fr>2019-08-06 23:57:21 (GMT)
committerMarge Bot <ben+marge-bot@smart-cactus.org>2019-08-16 02:13:52 (GMT)
commit142088536c4e4cd71bc3e4569f58266b91ef0b98 (patch)
tree0485e94bb1816b142da622887b0fccbb36175d65
parent8d076841cd5a22a43c9f19113a6fbed49de156f8 (diff)
downloadghc-142088536c4e4cd71bc3e4569f58266b91ef0b98.zip
ghc-142088536c4e4cd71bc3e4569f58266b91ef0b98.tar.gz
ghc-142088536c4e4cd71bc3e4569f58266b91ef0b98.tar.bz2
Cmm: constant folding `quotRem x 2^N`
`quot` and `rem` are implemented efficiently when the second argument is a constant power of 2. This patch uses the same implementations for `quotRem` primop.
-rw-r--r--compiler/codeGen/StgCmmPrim.hs50
1 files changed, 39 insertions, 11 deletions
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index c3f9d5a..61d88fe 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -46,6 +46,7 @@ import SMRep
import FastString
import Outputable
import Util
+import Data.Maybe
import Data.Bits ((.&.), bit)
import Control.Monad (liftM, when, unless)
@@ -872,43 +873,65 @@ emitPrimOp dflags r@[res] op args
emit stmt
emitPrimOp dflags results op args
- = case callishPrimOpSupported dflags op of
+ = case callishPrimOpSupported dflags op args of
Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
Right gen -> gen results args
+-- Note [QuotRem optimization]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- `quot` and `rem` with constant divisor can be implemented with fast bit-ops
+-- (shift, .&.).
+--
+-- Currently we only support optimization (performed in CmmOpt) when the
+-- constant is a power of 2. #9041 tracks the implementation of the general
+-- optimization.
+--
+-- `quotRem` can be optimized in the same way. However as it returns two values,
+-- it is implemented as a "callish" primop which is harder to match and
+-- to transform later on. For simplicity, the current implementation detects cases
+-- that can be optimized (see `quotRemCanBeOptimized`) and converts STG quotRem
+-- primop into two CMM quot and rem primops.
+
type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
-callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
-callishPrimOpSupported dflags op
+callishPrimOpSupported :: DynFlags -> PrimOp -> [CmmExpr] -> Either CallishMachOp GenericOp
+callishPrimOpSupported dflags op args
= case op of
- IntQuotRemOp | ncg && (x86ish || ppc) ->
- Left (MO_S_QuotRem (wordWidth dflags))
- | otherwise ->
- Right (genericIntQuotRemOp (wordWidth dflags))
+ IntQuotRemOp | ncg && (x86ish || ppc)
+ , not quotRemCanBeOptimized
+ -> Left (MO_S_QuotRem (wordWidth dflags))
+ | otherwise
+ -> Right (genericIntQuotRemOp (wordWidth dflags))
Int8QuotRemOp | ncg && (x86ish || ppc)
+ , not quotRemCanBeOptimized
-> Left (MO_S_QuotRem W8)
| otherwise -> Right (genericIntQuotRemOp W8)
Int16QuotRemOp | ncg && (x86ish || ppc)
+ , not quotRemCanBeOptimized
-> Left (MO_S_QuotRem W16)
| otherwise -> Right (genericIntQuotRemOp W16)
- WordQuotRemOp | ncg && (x86ish || ppc) ->
- Left (MO_U_QuotRem (wordWidth dflags))
- | otherwise ->
- Right (genericWordQuotRemOp (wordWidth dflags))
+ WordQuotRemOp | ncg && (x86ish || ppc)
+ , not quotRemCanBeOptimized
+ -> Left (MO_U_QuotRem (wordWidth dflags))
+ | otherwise
+ -> Right (genericWordQuotRemOp (wordWidth dflags))
WordQuotRem2Op | (ncg && (x86ish || ppc))
|| llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
| otherwise -> Right (genericWordQuotRem2Op dflags)
Word8QuotRemOp | ncg && (x86ish || ppc)
+ , not quotRemCanBeOptimized
-> Left (MO_U_QuotRem W8)
| otherwise -> Right (genericWordQuotRemOp W8)
Word16QuotRemOp| ncg && (x86ish || ppc)
+ , not quotRemCanBeOptimized
-> Left (MO_U_QuotRem W16)
| otherwise -> Right (genericWordQuotRemOp W16)
@@ -944,6 +967,11 @@ callishPrimOpSupported dflags op
_ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
where
+ -- See Note [QuotRem optimization]
+ quotRemCanBeOptimized = case args of
+ [_, CmmLit (CmmInt n _) ] -> isJust (exactLog2 n)
+ _ -> False
+
ncg = case hscTarget dflags of
HscAsm -> True
_ -> False