From 02dfcdc2e8e65d23a099d9f92ff6a1d2f98a312e Mon Sep 17 00:00:00 2001 From: Matthew Pickering <matthewtpickering@gmail.com> Date: Mon, 7 Aug 2023 09:56:01 +0100 Subject: [PATCH] Revert "Aarch64 NCG: Use encoded immediates for literals." This reverts commit 40425c5021a9d8eb5e1c1046e2d5fa0a2918f96c. See #23793 ------------------------- Metric Increase: T4801 T5321FD T5321Fun ------------------------- --- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs | 173 +++-------------------- compiler/GHC/CmmToAsm/AArch64/Instr.hs | 17 +-- compiler/GHC/CmmToAsm/AArch64/Ppr.hs | 1 - compiler/GHC/CmmToAsm/AArch64/Regs.hs | 9 -- 4 files changed, 26 insertions(+), 174 deletions(-) diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs index 052d649d7615..b77aa73e52bf 100644 --- a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs @@ -372,97 +372,6 @@ getSomeReg expr = do Fixed rep reg code -> return (reg, rep, code) -{- Note [Aarch64 immediates] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Aarch64 with it's fixed width instruction encoding uses leftover space for -immediates. -If you want the full rundown consult the arch reference document: -"Arm® Architecture Reference Manual" - "C3.4 Data processing - immediate" - -The gist of it is that different instructions allow for different immediate encodings. -The ones we care about for better code generation are: - -* Simple but potentially repeated bit-patterns for logic instructions. -* 16bit numbers shifted by multiples of 16. -* 12 bit numbers optionally shifted by 12 bits. - -It might seem like the ISA allows for 64bit immediates but this isn't the case. -Rather there are some instruction aliases which allow for large unencoded immediates -which will then be transalted to one of the immediate encodings implicitly. - -For example mov x1, #0x10000 is allowed but will be assembled to movz x1, #0x1, lsl #16 --} - --- | Move (wide immediate) --- Allows for 16bit immediate which can be shifted by 0/16/32/48 bits. --- Used with MOVZ,MOVN, MOVK --- See Note [Aarch64 immediates] -getMovWideImm :: Integer -> Width -> Maybe Operand -getMovWideImm n w - -- TODO: Handle sign extension/negatives - | n <= 0 - = Nothing - -- Fits in 16 bits - | sized_n < 2^(16 :: Int) - = Just $ OpImm (ImmInteger truncated) - - -- 0x0000 0000 xxxx 0000 - | trailing_zeros >= 16 && sized_n < 2^(32 :: Int) - = Just $ OpImmShift (ImmInteger $ truncated `shiftR` 16) SLSL 16 - - -- 0x 0000 xxxx 0000 0000 - | trailing_zeros >= 32 && sized_n < 2^(48 :: Int) - = Just $ OpImmShift (ImmInteger $ truncated `shiftR` 32) SLSL 32 - - -- 0x xxxx 0000 0000 0000 - | trailing_zeros >= 48 - = Just $ OpImmShift (ImmInteger $ truncated `shiftR` 48) SLSL 48 - - | otherwise - = Nothing - where - truncated = narrowU w n - sized_n = fromIntegral truncated :: Word64 - trailing_zeros = countTrailingZeros sized_n - --- | Arithmetic(immediate) --- Allows for 12bit immediates which can be shifted by 0 or 12 bits. --- Used with ADD, ADDS, SUB, SUBS, CMP, CMN --- See Note [Aarch64 immediates] -getArithImm :: Integer -> Width -> Maybe Operand -getArithImm n w - -- TODO: Handle sign extension - | n <= 0 - = Nothing - -- Fits in 16 bits - -- Fits in 12 bits - | sized_n < 2^(12::Int) - = Just $ OpImm (ImmInteger truncated) - - -- 12 bits shifted by 12 places. - | trailing_zeros >= 12 && sized_n < 2^(24::Int) - = Just $ OpImmShift (ImmInteger $ truncated `shiftR` 12) SLSL 12 - - | otherwise - = Nothing - where - sized_n = fromIntegral truncated :: Word64 - truncated = narrowU w n - trailing_zeros = countTrailingZeros sized_n - --- | Logical (immediate) --- Allows encoding of some repeated bitpatterns --- Used with AND, ANDS, EOR, ORR, TST --- and their aliases which includes at least MOV (bitmask immediate) --- See Note [Aarch64 immediates] -getBitmaskImm :: Integer -> Width -> Maybe Operand -getBitmaskImm n w - | isAArch64Bitmask truncated = Just $ OpImm (ImmInteger truncated) - | otherwise = Nothing - where - truncated = narrowU w n - - -- TODO OPT: we might be able give getRegister -- a hint, what kind of register we want. getFloatReg :: HasCallStack => CmmExpr -> NatM (Reg, Format, InstrBlock) @@ -585,14 +494,8 @@ getRegister' config plat expr CmmLit lit -> case lit of - -- Use wzr xzr for CmmInt 0 if the width matches up, otherwise do a move. - -- TODO: Reenable after https://gitlab.haskell.org/ghc/ghc/-/issues/23632 is fixed. - -- CmmInt 0 W32 -> do - -- let format = intFormat W32 - -- return (Fixed format reg_zero (unitOL $ (COMMENT ((text . show $ expr))) )) - -- CmmInt 0 W64 -> do - -- let format = intFormat W64 - -- return (Fixed format reg_zero (unitOL $ (COMMENT ((text . show $ expr))) )) + -- TODO handle CmmInt 0 specially, use wzr or xzr. + CmmInt i W8 | i >= 0 -> do return (Any (intFormat W8) (\dst -> unitOL $ annExpr expr (MOV (OpReg W8 dst) (OpImm (ImmInteger (narrowU W8 i)))))) CmmInt i W16 | i >= 0 -> do @@ -607,13 +510,8 @@ getRegister' config plat expr -- Those need the upper bits set. We'd either have to explicitly sign -- or figure out something smarter. Lowered to -- `MOV dst XZR` - CmmInt i w | i >= 0 - , Just imm_op <- getMovWideImm i w -> do - return (Any (intFormat w) (\dst -> unitOL $ annExpr expr (MOVZ (OpReg w dst) imm_op))) - CmmInt i w | isNbitEncodeable 16 i, i >= 0 -> do return (Any (intFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg W16 dst) (OpImm (ImmInteger i))))) - CmmInt i w | isNbitEncodeable 32 i, i >= 0 -> do let half0 = fromIntegral (fromIntegral i :: Word16) half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16) @@ -688,6 +586,7 @@ getRegister' config plat expr (op, imm_code) <- litToImm' lit let rep = cmmLitType plat lit format = cmmTypeFormat rep + -- width = typeWidth rep return (Any format (\dst -> imm_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op)) CmmLabelOff lbl off -> do @@ -892,51 +791,17 @@ getRegister' config plat expr -- withTempFloatReg w op = OpReg w <$> getNewRegNat (floatFormat w) >>= op -- A "plain" operation. - bitOpImm w op encode_imm = do + bitOp w op = do -- compute x<m> <- x -- compute x<o> <- y -- <OP> x<n>, x<m>, x<o> (reg_x, format_x, code_x) <- getSomeReg x - (op_y, format_y, code_y) <- case y of - CmmLit (CmmInt n w) - | Just imm_operand_y <- encode_imm n w - -> return (imm_operand_y, intFormat w, nilOL) - _ -> do - (reg_y, format_y, code_y) <- getSomeReg y - return (OpReg w reg_y, format_y, code_y) - massertPpr (isIntFormat format_x == isIntFormat format_y) $ text "bitOpImm: incompatible" + (reg_y, format_y, code_y) <- getSomeReg y + massertPpr (isIntFormat format_x == isIntFormat format_y) $ text "bitOp: incompatible" return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `appOL` - op (OpReg w dst) (OpReg w reg_x) op_y) - - -- A (potentially signed) integer operation. - -- In the case of 8- and 16-bit signed arithmetic we must first - -- sign-extend both arguments to 32-bits. - -- See Note [Signed arithmetic on AArch64]. - intOpImm :: Bool -> Width -> (Operand -> Operand -> Operand -> OrdList Instr) -> (Integer -> Width -> Maybe Operand) -> NatM (Register) - intOpImm {- is signed -} True w op _encode_imm = intOp True w op - intOpImm False w op encode_imm = do - -- compute x<m> <- x - -- compute x<o> <- y - -- <OP> x<n>, x<m>, x<o> - (reg_x, format_x, code_x) <- getSomeReg x - (op_y, format_y, code_y) <- case y of - CmmLit (CmmInt n w) - | Just imm_operand_y <- encode_imm n w - -> return (imm_operand_y, intFormat w, nilOL) - _ -> do - (reg_y, format_y, code_y) <- getSomeReg y - return (OpReg w reg_y, format_y, code_y) - massertPpr (isIntFormat format_x && isIntFormat format_y) $ text "intOp: non-int" - -- This is the width of the registers on which the operation - -- should be performed. - let w' = opRegWidth w - return $ Any (intFormat w) $ \dst -> - code_x `appOL` - code_y `appOL` - op (OpReg w' dst) (OpReg w' reg_x) (op_y) `appOL` - truncateReg w' w dst -- truncate back to the operand's original width + op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)) -- A (potentially signed) integer operation. -- In the case of 8- and 16-bit signed arithmetic we must first @@ -982,9 +847,9 @@ getRegister' config plat expr case op of -- Integer operations -- Add/Sub should only be Integer Options. - MO_Add w -> intOpImm False w (\d x y -> unitOL $ annExpr expr (ADD d x y)) getArithImm + MO_Add w -> intOp False w (\d x y -> unitOL $ annExpr expr (ADD d x y)) -- TODO: Handle sub-word case - MO_Sub w -> intOpImm False w (\d x y -> unitOL $ annExpr expr (SUB d x y)) getArithImm + MO_Sub w -> intOp False w (\d x y -> unitOL $ annExpr expr (SUB d x y)) -- Note [CSET] -- ~~~~~~~~~~~ @@ -1026,8 +891,8 @@ getRegister' config plat expr -- N.B. We needn't sign-extend sub-word size (in)equality comparisons -- since we don't care about ordering. - MO_Eq w -> bitOpImm w (\d x y -> toOL [ CMP x y, CSET d EQ ]) getArithImm - MO_Ne w -> bitOpImm w (\d x y -> toOL [ CMP x y, CSET d NE ]) getArithImm + MO_Eq w -> bitOp w (\d x y -> toOL [ CMP x y, CSET d EQ ]) + MO_Ne w -> bitOp w (\d x y -> toOL [ CMP x y, CSET d NE ]) -- Signed multiply/divide MO_Mul w -> intOp True w (\d x y -> unitOL $ MUL d x y) @@ -1056,10 +921,10 @@ getRegister' config plat expr MO_S_Lt w -> intOp True w (\d x y -> toOL [ CMP x y, CSET d SLT ]) -- Unsigned comparisons - MO_U_Ge w -> intOpImm False w (\d x y -> toOL [ CMP x y, CSET d UGE ]) getArithImm - MO_U_Le w -> intOpImm False w (\d x y -> toOL [ CMP x y, CSET d ULE ]) getArithImm - MO_U_Gt w -> intOpImm False w (\d x y -> toOL [ CMP x y, CSET d UGT ]) getArithImm - MO_U_Lt w -> intOpImm False w (\d x y -> toOL [ CMP x y, CSET d ULT ]) getArithImm + MO_U_Ge w -> intOp False w (\d x y -> toOL [ CMP x y, CSET d UGE ]) + MO_U_Le w -> intOp False w (\d x y -> toOL [ CMP x y, CSET d ULE ]) + MO_U_Gt w -> intOp False w (\d x y -> toOL [ CMP x y, CSET d UGT ]) + MO_U_Lt w -> intOp False w (\d x y -> toOL [ CMP x y, CSET d ULT ]) -- Floating point arithmetic MO_F_Add w -> floatOp w (\d x y -> unitOL $ ADD d x y) @@ -1082,9 +947,9 @@ getRegister' config plat expr MO_F_Lt w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OLT ]) -- x < y <=> y >= x -- Bitwise operations - MO_And w -> bitOpImm w (\d x y -> unitOL $ AND d x y) getBitmaskImm - MO_Or w -> bitOpImm w (\d x y -> unitOL $ ORR d x y) getBitmaskImm - MO_Xor w -> bitOpImm w (\d x y -> unitOL $ EOR d x y) getBitmaskImm + MO_And w -> bitOp w (\d x y -> unitOL $ AND d x y) + MO_Or w -> bitOp w (\d x y -> unitOL $ ORR d x y) + MO_Xor w -> bitOp w (\d x y -> unitOL $ EOR d x y) MO_Shl w -> intOp False w (\d x y -> unitOL $ LSL d x y) MO_U_Shr w -> intOp False w (\d x y -> unitOL $ LSR d x y) MO_S_Shr w -> intOp True w (\d x y -> unitOL $ ASR d x y) @@ -1134,7 +999,7 @@ getRegister' config plat expr where isNbitEncodeable :: Int -> Integer -> Bool - isNbitEncodeable n_bits i = let shift = n_bits - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift) + isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift) -- N.B. MUL does not set the overflow flag. -- These implementations are based on output from GCC 11. diff --git a/compiler/GHC/CmmToAsm/AArch64/Instr.hs b/compiler/GHC/CmmToAsm/AArch64/Instr.hs index 2f4a5fe38e57..d8dd1a4dc0c7 100644 --- a/compiler/GHC/CmmToAsm/AArch64/Instr.hs +++ b/compiler/GHC/CmmToAsm/AArch64/Instr.hs @@ -110,7 +110,6 @@ regUsageOfInstr platform instr = case instr of LSR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) MOV dst src -> usage (regOp src, regOp dst) MOVK dst src -> usage (regOp src, regOp dst) - MOVZ dst src -> usage (regOp src, regOp dst) MVN dst src -> usage (regOp src, regOp dst) ORR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) ROR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) @@ -252,7 +251,6 @@ patchRegsOfInstr instr env = case instr of LSR o1 o2 o3 -> LSR (patchOp o1) (patchOp o2) (patchOp o3) MOV o1 o2 -> MOV (patchOp o1) (patchOp o2) MOVK o1 o2 -> MOVK (patchOp o1) (patchOp o2) - MOVZ o1 o2 -> MOVZ (patchOp o1) (patchOp o2) MVN o1 o2 -> MVN (patchOp o1) (patchOp o2) ORR o1 o2 o3 -> ORR (patchOp o1) (patchOp o2) (patchOp o3) ROR o1 o2 o3 -> ROR (patchOp o1) (patchOp o2) (patchOp o3) @@ -383,8 +381,9 @@ mkSpillInstr config reg delta slot = where a .&~. b = a .&. (complement b) - fmt = fmtOfRealReg (case reg of { RegReal r -> r; _ -> panic "Expected real reg"}) - + fmt = case reg of + RegReal (RealRegSingle n) | n < 32 -> II64 + _ -> FF64 mkIp0SpillAddr imm = ANN (text "Spill: IP0 <- SP + " <> int imm) $ ADD ip0 sp (OpImm (ImmInt imm)) mkStrSp imm = ANN (text "Spill@" <> int (off - delta)) $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt imm))) mkStrIp0 imm = ANN (text "Spill@" <> int (off - delta)) $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 16) (ImmInt imm))) @@ -409,7 +408,9 @@ mkLoadInstr config reg delta slot = where a .&~. b = a .&. (complement b) - fmt = fmtOfRealReg (case reg of { RegReal r -> r; _ -> panic "Expected real reg"}) + fmt = case reg of + RegReal (RealRegSingle n) | n < 32 -> II64 + _ -> FF64 mkIp0SpillAddr imm = ANN (text "Reload: IP0 <- SP + " <> int imm) $ ADD ip0 sp (OpImm (ImmInt imm)) mkLdrSp imm = ANN (text "Reload@" <> int (off - delta)) $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt imm))) @@ -618,7 +619,7 @@ data Instr | MOV Operand Operand -- rd = rn or rd = #i | MOVK Operand Operand -- | MOVN Operand Operand - | MOVZ Operand Operand + -- | MOVZ Operand Operand | MVN Operand Operand -- rd = ~rn | ORN Operand Operand Operand -- rd = rn | ~op2 | ORR Operand Operand Operand -- rd = rn | op2 @@ -707,7 +708,6 @@ instrCon i = LSR{} -> "LSR" MOV{} -> "MOV" MOVK{} -> "MOVK" - MOVZ{} -> "MOVZ" MVN{} -> "MVN" ORN{} -> "ORN" ORR{} -> "ORR" @@ -782,9 +782,6 @@ wzr = OpReg W32 (RegReal (RealRegSingle (-1))) sp = OpReg W64 (RegReal (RealRegSingle 31)) ip0 = OpReg W64 (RegReal (RealRegSingle 16)) -reg_zero :: Reg -reg_zero = RegReal (RealRegSingle (-1)) - _x :: Int -> Operand _x i = OpReg W64 (RegReal (RealRegSingle i)) x0, x1, x2, x3, x4, x5, x6, x7 :: Operand diff --git a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs index fa9f7df397d8..e8a2ce57fab3 100644 --- a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs +++ b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs @@ -417,7 +417,6 @@ pprInstr platform instr = case instr of | isFloatOp o1 || isFloatOp o2 -> op2 (text "\tfmov") o1 o2 | otherwise -> op2 (text "\tmov") o1 o2 MOVK o1 o2 -> op2 (text "\tmovk") o1 o2 - MOVZ o1 o2 -> op2 (text "\tmovz") o1 o2 MVN o1 o2 -> op2 (text "\tmvn") o1 o2 ORN o1 o2 o3 -> op3 (text "\torn") o1 o2 o3 ORR o1 o2 o3 -> op3 (text "\torr") o1 o2 o3 diff --git a/compiler/GHC/CmmToAsm/AArch64/Regs.hs b/compiler/GHC/CmmToAsm/AArch64/Regs.hs index 43544ce31515..4895d2b092a0 100644 --- a/compiler/GHC/CmmToAsm/AArch64/Regs.hs +++ b/compiler/GHC/CmmToAsm/AArch64/Regs.hs @@ -77,8 +77,6 @@ litToImm (CmmInt i w) = ImmInteger (narrowS w i) -- narrow to the width: a CmmInt might be out of -- range, but we assume that ImmInteger only contains -- in-range values. A signed value should be fine here. - -- AK: We do call this with out of range values, however - -- it just truncates as we would expect. litToImm (CmmFloat f W32) = ImmFloat f litToImm (CmmFloat f W64) = ImmDouble f litToImm (CmmLabel l) = ImmCLbl l @@ -149,13 +147,6 @@ classOfRealReg (RealRegSingle i) | i < 32 = RcInteger | otherwise = RcDouble -fmtOfRealReg :: RealReg -> Format -fmtOfRealReg real_reg = - case classOfRealReg real_reg of - RcInteger -> II64 - RcDouble -> FF64 - RcFloat -> panic "No float regs on arm" - regDotColor :: RealReg -> SDoc regDotColor reg = case classOfRealReg reg of -- GitLab