diff --git a/libraries/base/src/Data/Semigroup/Internal.hs b/libraries/base/src/Data/Semigroup/Internal.hs
index 7da13f0443e95935578daff7820ceab0d237a648..d5443a01565fd4fa0196c557a251ffa2571cd0f8 100644
--- a/libraries/base/src/Data/Semigroup/Internal.hs
+++ b/libraries/base/src/Data/Semigroup/Internal.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE DeriveGeneric #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE PolyKinds #-}
@@ -134,8 +135,46 @@ newtype Endo a = Endo { appEndo :: a -> a }
 
 -- | @since 4.9.0.0
 instance Semigroup (Endo a) where
-        (<>) = coerce ((.) :: (a -> a) -> (a -> a) -> (a -> a))
-        stimes = stimesMonoid
+    (<>) = coerce ((.) :: (a -> a) -> (a -> a) -> (a -> a))
+
+    -- See Note [stimes Endo]
+    stimes !n0 (Endo e) = Endo (\a0 ->
+      -- We check separately for 0 and 1 per
+      -- https://github.com/haskell/core-libraries-committee/issues/4#issuecomment-955605592
+      -- We are explicitly strict in the number so strictness is calculated
+      -- correctly even without specialization.
+      case n0 of
+        _ | n0 < 0 -> stimesEndoError
+        0 -> a0
+        1 -> e a0
+        _ -> go n0 a0)
+      where
+        go !0 a = a
+        go n a = e (go (n - 1) a)
+
+{-# NOINLINE stimesEndoError #-}
+-- There's no reason to put this gunk in the unfolding.
+stimesEndoError :: a
+stimesEndoError = errorWithoutStackTrace "stimes (for Endo): negative multiplier"
+
+-- Note [stimes Endo]
+-- ~~~~~~~~~~~~~~~~~~
+--
+-- We used to use
+--
+--   stimes = stimesMonoid
+--
+-- But this is pretty bad! The function it produces is represented in memory as
+-- a balanced tree of compositions. To actually *apply* that function, it's
+-- necessary to walk the tree. It's much better to just construct a function
+-- that counts out applications.
+--
+-- Why do we break open the `Endo` construction rather than just using `mempty`
+-- and `<>`? We want GHC to infer that `stimes` has an arity of 3. Currently,
+-- it does so by default, but there has been some talk in the past of turning
+-- on -fpedantic-bottoms, which would drop the arity to 2. Indeed, if we were
+-- really careless, we could theoretically get GHC to build a *list* of
+-- compositions, which would be awful.
 
 -- | @since 2.01
 instance Monoid (Endo a) where
diff --git a/libraries/base/tests/all.T b/libraries/base/tests/all.T
index 3b92c9eb48153edfd1dae97891c37ce4d1a6fe79..d10182cb484e07f748365da1d8d3aea045eb3719 100644
--- a/libraries/base/tests/all.T
+++ b/libraries/base/tests/all.T
@@ -317,3 +317,4 @@ test('T23697',
   [ when(opsys('mingw32'), skip) # header not found
   , when(opsys('darwin'), skip)  # permission denied
   ], makefile_test, ['T23697'])
+test('stimesEndo', normal, compile_and_run, [''])
diff --git a/libraries/base/tests/stimesEndo.hs b/libraries/base/tests/stimesEndo.hs
new file mode 100644
index 0000000000000000000000000000000000000000..3f94e2f0a19e8dfc661466ce72cdc48dbf21ef74
--- /dev/null
+++ b/libraries/base/tests/stimesEndo.hs
@@ -0,0 +1,9 @@
+module Main where
+
+import Data.Semigroup
+
+adder :: Int -> Endo Int
+adder n = stimes n (Endo (+ 1))
+
+main :: IO ()
+main = print $ map (\n -> appEndo (adder n) 0) [0 .. 5]
diff --git a/libraries/base/tests/stimesEndo.stdout b/libraries/base/tests/stimesEndo.stdout
new file mode 100644
index 0000000000000000000000000000000000000000..1b69d798193599abc68cdb8f21370465a41165a7
--- /dev/null
+++ b/libraries/base/tests/stimesEndo.stdout
@@ -0,0 +1 @@
+[0,1,2,3,4,5]