diff --git a/.mailmap b/.mailmap
index 831f8d2392375c5473ff2d7cf42cfc0859f029e0..6106feede37f8add8e685c0b8356abd2091d9f8e 100644
--- a/.mailmap
+++ b/.mailmap
@@ -349,6 +349,7 @@ shelarcy                <shelarcy@gmail.com>                # Uses this name onl
 Ömer Sinan Ağacan       <omeragacan@gmail.com>              <omer@well-typed.com>
 
 # Note [geoffw]
+# ~~~~~~~~~~~~~
 # From GHC wiki: "Geoff Washburn made the first implementation of GADTs in GHC"
 # Other possibilities (but not such a direct connection with GHC):
 #  * Geoff W. Hamilton
@@ -357,14 +358,16 @@ shelarcy                <shelarcy@gmail.com>                # Uses this name onl
 #    PhD student, OCaml, "Dynamic ADTs"
 #
 # Note [uid245]
+# ~~~~~~~~~~~~~
 # Circumstantial evidence only:
 # * Commit fafe43, "Avoid divide by zero", by simonm.
 # * Subsequent commit fd40a1, "avoid another divide by zero", by uid245.
 # * Three commits later 15e6ea, "urk, extra parenthesis crept in", by simonm. Same file section.
 #
 # Note [usrbincc]
+# ~~~~~~~~~~~~~~~
 # https://github.com/thlorenz/node-traceur/commit/59f97feae23763c456b70bb129dbe04004e9fe04
 #
 # Note [zhuang]
+# ~~~~~~~~~~~~~
 # http://comments.gmane.org/gmane.comp.lang.haskell.cvs.ghc/33473
-#EOF
diff --git a/Makefile b/Makefile
index cf15d1c08687b84a16cea450031fc389b3e4f8d8..389bfd32f865413d0577a947819a26f9e4342aa0 100644
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,7 @@ install show:
 	$(MAKE) --no-print-directory -f ghc.mk $@ BINDIST=YES NO_INCLUDE_DEPS=YES
 
 # Note [install-strip]
-#
+# ~~~~~~~~~~~~~~~~~~~~
 # install-strip is like install, but it strips the executable files while
 # installing them.
 #
@@ -191,7 +191,7 @@ endif
 endif
 
 # Note [validate and testsuite speed]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # There are 3 different validate and testsuite speed settings:
 # fast, normal and slow.
 #
diff --git a/compiler/CodeGen.Platform.h b/compiler/CodeGen.Platform.h
index ceccc38620c645b7b75a6b71606a4276afad3a74..346dd0af0361e79559c8b74f5cd302f4fc553b7d 100644
--- a/compiler/CodeGen.Platform.h
+++ b/compiler/CodeGen.Platform.h
@@ -834,7 +834,7 @@ freeReg :: RegNo -> Bool
 
 # if defined(MACHREGS_i386)
 freeReg esp = False -- %esp is the C stack pointer
-freeReg esi = False -- Note [esi/edi/ebp not allocatable]
+freeReg esi = False -- See Note [esi/edi/ebp not allocatable]
 freeReg edi = False
 freeReg ebp = False
 # endif
@@ -844,7 +844,7 @@ freeReg rsp = False  --        %rsp is the C stack pointer
 
 {-
 Note [esi/edi/ebp not allocatable]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 %esi is mapped to R1, so %esi would normally be allocatable while it
 is not being used for R1.  However, %esi has no 8-bit version on x86,
 and the linear register allocator is not sophisticated enough to
diff --git a/compiler/GHC/Builtin/Types.hs b/compiler/GHC/Builtin/Types.hs
index 2096e27a2b9167dc029e8cda398bb3d27fec40d4..347afad5c032af068de2da0e9cf500807ca7fde8 100644
--- a/compiler/GHC/Builtin/Types.hs
+++ b/compiler/GHC/Builtin/Types.hs
@@ -762,7 +762,7 @@ constraintKind   = mkTyConTy constraintKindTyCon
 *                                                                      *
 ************************************************************************
 
-Note [How tuples work]  See also Note [Known-key names] in GHC.Builtin.Names
+Note [How tuples work]
 ~~~~~~~~~~~~~~~~~~~~~~
 * There are three families of tuple TyCons and corresponding
   DataCons, expressed by the type BasicTypes.TupleSort:
@@ -814,6 +814,8 @@ Note [How tuples work]  See also Note [Known-key names] in GHC.Builtin.Names
   deserialization we lookup the Name associated with the unique with the logic
   in GHC.Builtin.Uniques. See Note [Symbol table representation of names] for details.
 
+See also Note [Known-key names] in GHC.Builtin.Names.
+
 Note [One-tuples]
 ~~~~~~~~~~~~~~~~~
 GHC supports both boxed and unboxed one-tuples:
diff --git a/compiler/GHC/Builtin/Uniques.hs b/compiler/GHC/Builtin/Uniques.hs
index dc70ce3f5c01148a1be4c65ab340a31d80bfb26b..acf835c996b40bac37b090dffabc096559b0599b 100644
--- a/compiler/GHC/Builtin/Uniques.hs
+++ b/compiler/GHC/Builtin/Uniques.hs
@@ -143,7 +143,6 @@ getUnboxedSumName n
 
 -- Note [Uniques for tuple type and data constructors]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Wired-in type constructor keys occupy *two* slots:
 --    * u: the TyCon itself
 --    * u+1: the TyConRepName of the TyCon
@@ -156,7 +155,6 @@ getUnboxedSumName n
 {-
 Note [Unique layout for constraint tuple selectors]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 Constraint tuples, like boxed and unboxed tuples, have their type and data
 constructor Uniques wired in (see
 Note [Uniques for tuple type and data constructors]). Constraint tuples are
diff --git a/compiler/GHC/Builtin/primops.txt.pp b/compiler/GHC/Builtin/primops.txt.pp
index 32e185e3a9fee05615af038d02ff8f29bfb944bf..772371235e8278247aaa7d663f56fcd4cb9d77ea 100644
--- a/compiler/GHC/Builtin/primops.txt.pp
+++ b/compiler/GHC/Builtin/primops.txt.pp
@@ -149,7 +149,6 @@ defaults
 
 -- Note [When do out-of-line primops go in primops.txt.pp]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Out of line primops are those with a C-- implementation. But that
 -- doesn't mean they *just* have an C-- implementation. As mentioned in
 -- Note [Inlining out-of-line primops and heap checks], some out-of-line
@@ -2437,7 +2436,6 @@ primop  WriteMutVarOp "writeMutVar#"  GenPrimOp
 
 -- Note [Why not an unboxed tuple in atomicModifyMutVar2#?]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Looking at the type of atomicModifyMutVar2#, one might wonder why
 -- it doesn't return an unboxed tuple. e.g.,
 --
@@ -3173,7 +3171,6 @@ primop  ReallyUnsafePtrEqualityOp "reallyUnsafePtrEquality#" GenPrimOp
 
 -- Note [reallyUnsafePtrEquality# can_fail]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- reallyUnsafePtrEquality# can't actually fail, per se, but we mark it
 -- can_fail anyway. Until 5a9a1738023a, GHC considered primops okay for
 -- speculation only when their arguments were known to be forced. This was
@@ -3264,7 +3261,7 @@ primop  DataToTagOp "dataToTag#" GenPrimOp
    a -> Int#  -- Zero-indexed; the first constructor has tag zero
    with
    strictness = { \ _arity -> mkClosedDmdSig [evalDmd] topDiv }
-   -- See Note [dataToTag# magic] in GHC.Core.Op.ConstantFold
+   -- See Note [dataToTag# magic] in GHC.Core.Opt.ConstantFold
 
 primop  TagToEnumOp "tagToEnum#" GenPrimOp
    Int# -> a
diff --git a/compiler/GHC/ByteCode/Types.hs b/compiler/GHC/ByteCode/Types.hs
index ed1bd9bf138bf41f52293713e2e2ba55e11a7ceb..6eb661ac1898b03068d93a21149d74e5168ab895 100644
--- a/compiler/GHC/ByteCode/Types.hs
+++ b/compiler/GHC/ByteCode/Types.hs
@@ -84,7 +84,6 @@ newtype RegBitmap = RegBitmap { unRegBitmap :: Word32 }
 
 {- Note [GHCi TupleInfo]
 ~~~~~~~~~~~~~~~~~~~~~~~~
-
    This contains the data we need for passing unboxed tuples between
    bytecode and native code
 
diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs
index fd9f019e04b49859542b16ca84d1a17ae5da477b..3acace8be209d9ff200f3c43a9e869424501c816 100644
--- a/compiler/GHC/Cmm/CLabel.hs
+++ b/compiler/GHC/Cmm/CLabel.hs
@@ -478,7 +478,7 @@ data IdLabelInfo
                         -- Note [Bytes label].
   | BlockInfoTable      -- ^ Like LocalInfoTable but for a proc-point block
                         -- instead of a closure entry-point.
-                        -- See Note [Proc-point local block entry-point].
+                        -- See Note [Proc-point local block entry-points].
 
   deriving (Eq, Ord)
 
@@ -587,7 +587,7 @@ mkBytesLabel name                 = IdLabel name NoCafRefs Bytes
 
 mkBlockInfoTableLabel :: Name -> CafInfo -> CLabel
 mkBlockInfoTableLabel name c = IdLabel name c BlockInfoTable
-                               -- See Note [Proc-point local block entry-point].
+                               -- See Note [Proc-point local block entry-points].
 
 -- Constructing Cmm Labels
 mkDirty_MUT_VAR_Label,
@@ -865,7 +865,7 @@ toEntryLbl platform lbl = case lbl of
    IdLabel n c (ConInfoTable k)  -> IdLabel n c (ConEntry k)
 
    IdLabel n _ BlockInfoTable    -> mkLocalBlockLabel (nameUnique n)
-                   -- See Note [Proc-point local block entry-point].
+                   -- See Note [Proc-point local block entry-points].
    IdLabel n c _                 -> IdLabel n c Entry
    CmmLabel m ext str CmmInfo    -> CmmLabel m ext str CmmEntry
    CmmLabel m ext str CmmRetInfo -> CmmLabel m ext str CmmRet
@@ -898,7 +898,6 @@ hasCAF _                            = False
 
 -- Note [ticky for LNE]
 -- ~~~~~~~~~~~~~~~~~~~~~
-
 -- Until 14 Feb 2013, every ticky counter was associated with a
 -- closure. Thus, ticky labels used IdLabel. It is odd that
 -- GHC.Cmm.Info.Build.cafTransfers would consider such a ticky label
@@ -1465,7 +1464,6 @@ pprCLabel !platform !sty lbl = -- see Note [Bangs in CLabel]
 
 -- Note [Internal proc labels]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Some tools (e.g. the `perf` utility on Linux) rely on the symbol table
 -- for resolution of function names. To help these tools we provide the
 -- (enabled by default) -fexpose-all-symbols flag which causes GHC to produce
diff --git a/compiler/GHC/Cmm/ContFlowOpt.hs b/compiler/GHC/Cmm/ContFlowOpt.hs
index 73c13d204082942618c78fe9756fc887de02b2b6..350f94c818b532186604a3783d2223ebcabc6216 100644
--- a/compiler/GHC/Cmm/ContFlowOpt.hs
+++ b/compiler/GHC/Cmm/ContFlowOpt.hs
@@ -29,7 +29,6 @@ import Control.Monad
 
 -- Note [What is shortcutting]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Consider this Cmm code:
 --
 -- L1: ...
@@ -53,7 +52,6 @@ import Control.Monad
 
 -- Note [Control-flow optimisations]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- This optimisation does three things:
 --
 --   - If a block finishes in an unconditional branch to another block
@@ -80,7 +78,6 @@ import Control.Monad
 
 -- Note [Shortcut call returns]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- We are going to maintain the "current" graph (LabelMap CmmBlock) as
 -- we go, and also a mapping from BlockId to BlockId, representing
 -- continuation labels that we have renamed.  This latter mapping is
@@ -106,7 +103,6 @@ import Control.Monad
 
 -- Note [Shortcut call returns and proc-points]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Consider this code that you might get from a recursive
 -- let-no-escape:
 --
diff --git a/compiler/GHC/Cmm/Dataflow.hs b/compiler/GHC/Cmm/Dataflow.hs
index 3e310fefcb104d67411d3e4592a07cfba86c6dbb..ad1c37ace22ec1821b62be3e014b538659ccc750 100644
--- a/compiler/GHC/Cmm/Dataflow.hs
+++ b/compiler/GHC/Cmm/Dataflow.hs
@@ -294,7 +294,7 @@ sortBlocks direction entry blockmap =
     fwd = revPostorderFrom blockmap entry
 
 -- Note [Backward vs forward analysis]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- The forward and backward cases are not dual.  In the forward case, the entry
 -- points are known, and one simply traverses the body blocks from those points.
 -- In the backward case, something is known about the exit points, but a
@@ -350,7 +350,7 @@ updateFact
 updateFact fact_join dep_blocks (todo, fbase) lbl new_fact
   = case lookupFact lbl fbase of
       Nothing ->
-          -- Note [No old fact]
+          -- See Note [No old fact]
           let !z = mapInsert lbl new_fact fbase in (changed, z)
       Just old_fact ->
           case fact_join (OldFact old_fact) (NewFact new_fact) of
@@ -362,7 +362,7 @@ updateFact fact_join dep_blocks (todo, fbase) lbl new_fact
 
 {-
 Note [No old fact]
-
+~~~~~~~~~~~~~~~~~~
 We know that the new_fact is >= _|_, so we don't need to join.  However,
 if the new fact is also _|_, and we have already analysed its block,
 we don't need to record a change.  So there's a tradeoff here.  It turns
diff --git a/compiler/GHC/Cmm/Expr.hs b/compiler/GHC/Cmm/Expr.hs
index 52cb63c9018b6160e989b7cd80f4992212cf0489..f63ef62dab3dc0d65002b9055ed7687b4ec53350 100644
--- a/compiler/GHC/Cmm/Expr.hs
+++ b/compiler/GHC/Cmm/Expr.hs
@@ -86,7 +86,7 @@ data CmmReg
 data Area
   = Old            -- See Note [Old Area]
   | Young {-# UNPACK #-} !BlockId  -- Invariant: must be a continuation BlockId
-                   -- See Note [Continuation BlockId] in GHC.Cmm.Node.
+                   -- See Note [Continuation BlockIds] in GHC.Cmm.Node.
   deriving (Eq, Ord, Show)
 
 {- Note [Old Area]
@@ -203,7 +203,7 @@ data CmmLit
 
   | CmmBlock {-# UNPACK #-} !BlockId     -- Code label
         -- Invariant: must be a continuation BlockId
-        -- See Note [Continuation BlockId] in GHC.Cmm.Node.
+        -- See Note [Continuation BlockIds] in GHC.Cmm.Node.
 
   | CmmHighStackMark -- A late-bound constant that stands for the max
                      -- #bytes of stack space used during a procedure.
@@ -410,7 +410,7 @@ data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show )
 -----------------------------------------------------------------------------
 {-
 Note [Overlapping global registers]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The backend might not faithfully implement the abstraction of the STG
 machine with independent registers for different values of type
 GlobalReg. Specifically, certain pairs of registers (r1, r2) may
diff --git a/compiler/GHC/Cmm/Graph.hs b/compiler/GHC/Cmm/Graph.hs
index ef8ae7f26b532148b454805cdab933684a2b08c3..ff9391a7fe4460f78b59ee1b83bf19eefec28d02 100644
--- a/compiler/GHC/Cmm/Graph.hs
+++ b/compiler/GHC/Cmm/Graph.hs
@@ -425,7 +425,7 @@ copyOutOflow profile conv transfer area actuals updfr_off extra_stack_stuff
 
 
 -- Note [Width of parameters]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- Consider passing a small (< word width) primitive like Int8# to a function.
 -- It's actually non-trivial to do this without extending/narrowing:
 -- * Global registers are considered to have native word width (i.e., 64-bits on
diff --git a/compiler/GHC/Cmm/Info/Build.hs b/compiler/GHC/Cmm/Info/Build.hs
index 01f3c2a3ff5f728efd11d9b61ba98c7aac4d22a7..571a1faae7af38f8b54a8d98e631cc35cb40b793 100644
--- a/compiler/GHC/Cmm/Info/Build.hs
+++ b/compiler/GHC/Cmm/Info/Build.hs
@@ -55,7 +55,6 @@ import GHC.Types.Name.Set
 
 {- Note [SRTs]
    ~~~~~~~~~~~
-
 SRTs are the mechanism by which the garbage collector can determine
 the live CAFs in the program.
 
@@ -925,7 +924,7 @@ doSCC cfg staticFuns static_data (CyclicSCC nodes) = do
 
 
 {- Note [recursive SRTs]
-
+   ~~~~~~~~~~~~~~~~~~~~~
 If the dependency analyser has found us a recursive group of
 declarations, then we build a single SRT for the whole group, on the
 grounds that everything in the group is reachable from everything
diff --git a/compiler/GHC/Cmm/LayoutStack.hs b/compiler/GHC/Cmm/LayoutStack.hs
index ad13e8f4311569ad7374fc048050df8c0b935476..1bd00ed65a1a6ef38ea939fe7fc56bebb52668e6 100644
--- a/compiler/GHC/Cmm/LayoutStack.hs
+++ b/compiler/GHC/Cmm/LayoutStack.hs
@@ -39,7 +39,7 @@ import Data.Array as Array
 import Data.List (nub)
 
 {- Note [Stack Layout]
-
+   ~~~~~~~~~~~~~~~~~~~
 The job of this pass is to
 
  - replace references to abstract stack Areas with fixed offsets from Sp.
@@ -141,7 +141,7 @@ Pass 2:
 
 
 Note [Two pass approach]
-
+~~~~~~~~~~~~~~~~~~~~~~~~
 The main reason for Pass 2 is being able to insert only the reloads that are
 needed and the fact that the two passes need different liveness information.
 Let's consider an example:
@@ -510,7 +510,7 @@ handleLastNode cfg procpoints liveness cont_info stackmaps
                                 , LabelMap StackMap )
 
      handleBranches
-         -- Note [diamond proc point]
+         -- See Note [diamond proc point]
        | Just l <- futureContinuation middle
        , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
        = do
@@ -644,9 +644,8 @@ setupStackFrame platform lbl liveness updfr_off ret_args stack0
                          }
 
 
--- -----------------------------------------------------------------------------
 -- Note [diamond proc point]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~
 -- This special case looks for the pattern we get from a typical
 -- tagged case expression:
 --
@@ -895,7 +894,7 @@ maybeAddSpAdj cfg sp0 sp_off block =
       where sp_unwind = CmmRegOff spReg (sp0 - platformWordSizeInBytes platform - sp_off)
 
 {- Note [SP old/young offsets]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Sp(L) is the Sp offset on entry to block L relative to the base of the
 OLD area.
 
@@ -1098,7 +1097,7 @@ insertReloads platform stackmap live =
 
 {-
 Note [Lower safe foreign calls]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We start with
 
    Sp[young(L1)] = L1
diff --git a/compiler/GHC/Cmm/MachOp.hs b/compiler/GHC/Cmm/MachOp.hs
index cd2d331a58d509c10980e4226b76637ff549bbf0..0bd3ac1111ea1083759f7e89afe97e7db852916f 100644
--- a/compiler/GHC/Cmm/MachOp.hs
+++ b/compiler/GHC/Cmm/MachOp.hs
@@ -340,9 +340,8 @@ isFloatComparison mop =
     MO_F_Lt {} -> True
     _other     -> False
 
--- -----------------------------------------------------------------------------
--- Inverting conditions
-
+-- Note [Inverting conditions]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- Sometimes it's useful to be able to invert the sense of a
 -- condition.  Not all conditional tests are invertible: in
 -- particular, floating point conditionals cannot be inverted, because
diff --git a/compiler/GHC/Cmm/Node.hs b/compiler/GHC/Cmm/Node.hs
index fe6eac3223285d48019e41e15d648bb751114759..d7d35a8bfc3c2a3d1b42b084ae1ef9460ec08445 100644
--- a/compiler/GHC/Cmm/Node.hs
+++ b/compiler/GHC/Cmm/Node.hs
@@ -105,7 +105,7 @@ data CmmNode e x where
 
   CmmSwitch
     :: CmmExpr       -- Scrutinee, of some integral type
-    -> SwitchTargets -- Cases. See [Note SwitchTargets]
+    -> SwitchTargets -- Cases. See Note [SwitchTargets]
     -> CmmNode O C
 
   CmmCall :: {                -- A native call or tail call
@@ -114,7 +114,9 @@ data CmmNode e x where
       cml_cont :: Maybe Label,
           -- Label of continuation (Nothing for return or tail call)
           --
-          -- Note [Continuation BlockIds]: these BlockIds are called
+          -- Note [Continuation BlockIds]
+          -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+          -- These BlockIds are called
           -- Continuation BlockIds, and are the only BlockIds that can
           -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or
           -- (CmmStackSlot (Young b) _).
@@ -196,7 +198,6 @@ sequence.
 
 {- Note [Unsafe foreign calls clobber caller-save registers]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 A foreign call is defined to clobber any GlobalRegs that are mapped to
 caller-saves machine registers (according to the prevailing C ABI).
 GHC.StgToCmm.Utils.callerSaves tells you which GlobalRegs are caller-saves.
@@ -386,7 +387,6 @@ instance DefinerOfRegs GlobalReg (CmmNode e x) where
 
 -- Note [Safe foreign calls clobber STG registers]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- During stack layout phase every safe foreign call is expanded into a block
 -- that contains unsafe foreign call (instead of safe foreign call) and ends
 -- with a normal call (See Note [Foreign calls]). This means that we must
@@ -642,8 +642,8 @@ data CmmTickScope
     -- the new block could have a combined tick scope a/c+b/d, which
     -- both tick<2> and tick<3> apply to.
 
--- Note [CmmTick scoping details]:
---
+-- Note [CmmTick scoping details]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- The scope of a @CmmTick@ is given by the @CmmEntry@ node of the
 -- same block. Note that as a result of this, optimisations making
 -- tick scopes more specific can *reduce* the amount of code a tick
diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y
index ed9492aa329c0fbc4c51bf22ce3e3ea1f4f03acb..68d58213090fbb5d2ed2a62be9e09b0608b3497e 100644
--- a/compiler/GHC/Cmm/Parser.y
+++ b/compiler/GHC/Cmm/Parser.y
@@ -6,9 +6,9 @@
 --
 -----------------------------------------------------------------------------
 
-{- -----------------------------------------------------------------------------
+{-
 Note [Syntax of .cmm files]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 NOTE: You are very much on your own in .cmm.  There is very little
 error checking at all:
 
diff --git a/compiler/GHC/Cmm/Pipeline.hs b/compiler/GHC/Cmm/Pipeline.hs
index 270a28146147a5412bad1fbda0b218192b32d4ae..585606fcb20d60d0a9c8804f52444d40e74fa58e 100644
--- a/compiler/GHC/Cmm/Pipeline.hs
+++ b/compiler/GHC/Cmm/Pipeline.hs
@@ -175,7 +175,6 @@ cpsTop logger platform cfg proc =
 
 -- Note [Sinking after stack layout]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- In the past we considered running sinking pass also before stack
 -- layout, but after making some measurements we realized that:
 --
@@ -301,7 +300,7 @@ cpsTop logger platform cfg proc =
 --
 
 {- Note [inconsistent-pic-reg]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 On x86/Darwin, PIC is implemented by inserting a sequence like
 
     call 1f
@@ -329,7 +328,7 @@ _GLOBAL_OFFSET_TABLE_, regardless of which entry point we arrived via.
 -}
 
 {- Note [unreachable blocks]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~
 The control-flow optimiser sometimes leaves unreachable blocks behind
 containing junk code.  These aren't necessarily a problem, but
 removing them is good because it might save time in the native code
diff --git a/compiler/GHC/Cmm/ProcPoint.hs b/compiler/GHC/Cmm/ProcPoint.hs
index 0cabea153692d99f343d9dc1140a99c60f4d372b..cd55b4d2554fad937e7761b116310967f487bb51 100644
--- a/compiler/GHC/Cmm/ProcPoint.hs
+++ b/compiler/GHC/Cmm/ProcPoint.hs
@@ -428,7 +428,7 @@ attachContInfoTables _ other_decl
 
 {-
 Note [Direct reachability]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 Block B is directly reachable from proc point P iff control can flow
 from P to B without passing through an intervening proc point.
 -}
@@ -437,7 +437,7 @@ from P to B without passing through an intervening proc point.
 
 {-
 Note [No simple dataflow]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~
 Sadly, it seems impossible to compute the proc points using a single
 dataflow pass.  One might attempt to use this simple lattice:
 
diff --git a/compiler/GHC/Cmm/Sink.hs b/compiler/GHC/Cmm/Sink.hs
index 7d909671327e971380de41f3ec4efd460733b7b4..0f3d97971689985049015ebe3da843dd6b145276 100644
--- a/compiler/GHC/Cmm/Sink.hs
+++ b/compiler/GHC/Cmm/Sink.hs
@@ -472,7 +472,7 @@ tryToInline platform liveAfter node assigs =
 
   go usages live node skipped (a@(l,rhs,_) : rest)
    | cannot_inline            = dont_inline
-   | occurs_none              = discard  -- Note [discard during inlining]
+   | occurs_none              = discard  -- See Note [discard during inlining]
    | occurs_once              = inline_and_discard
    | isTrivial platform rhs   = inline_and_keep
    | otherwise                = dont_inline
@@ -496,7 +496,7 @@ tryToInline platform liveAfter node assigs =
                 live' = inline foldLocalRegsUsed platform (\m r -> insertLRegSet r m)
                                             live rhs
 
-        cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
+        cannot_inline = skipped `regsUsedIn` rhs -- See Note [dependent assignments]
                         || l `elemLRegSet` skipped
                         || not (okToInline platform rhs node)
 
@@ -519,8 +519,7 @@ tryToInline platform liveAfter node assigs =
         inl_exp other = other
 
 {- Note [Keeping assignemnts mentioned in skipped RHSs]
-    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     If we have to assignments: [z = y, y = e1] and we skip
     z we *must* retain the assignment y = e1. This is because
     we might inline "z = y" into another node later on so we
@@ -541,7 +540,7 @@ tryToInline platform liveAfter node assigs =
 -}
 
 {- Note [improveConditional]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~
 cmmMachOpFold tries to simplify conditionals to turn things like
   (a == b) != 1
 into
@@ -579,7 +578,6 @@ improveConditional other = other
 
 -- Note [dependent assignments]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- If our assignment list looks like
 --
 --    [ y = e,  x = ... y ... ]
@@ -690,7 +688,6 @@ conflicts platform (r, rhs, addr) node
 
 {- Note [Inlining foldRegsDefd]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
    foldRegsDefd is, after optimization, *not* a small function so
    it's only marked INLINEABLE, but not INLINE.
 
@@ -720,7 +717,6 @@ localRegistersConflict platform expr node =
 
 -- Note [Sinking and calls]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- We have three kinds of calls: normal (CmmCall), safe foreign (CmmForeignCall)
 -- and unsafe foreign (CmmUnsafeForeignCall). We perform sinking pass after
 -- stack layout (see Note [Sinking after stack layout]) which leads to two
@@ -803,7 +799,6 @@ data AbsMem
 
 -- Note [Foreign calls clobber heap]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- It is tempting to say that foreign calls clobber only
 -- non-heap/stack memory, but unfortunately we break this invariant in
 -- the RTS.  For example, in stg_catch_retry_frame we call
diff --git a/compiler/GHC/Cmm/Switch.hs b/compiler/GHC/Cmm/Switch.hs
index 7bef1e293a0defd725ae84cafe72656a8d882a92..f8c6c674ef9d3373ea1942a83d5b4826f90cd30b 100644
--- a/compiler/GHC/Cmm/Switch.hs
+++ b/compiler/GHC/Cmm/Switch.hs
@@ -26,7 +26,6 @@ import qualified Data.Map as M
 
 -- Note [Cmm Switches, the general plan]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Compiling a high-level switch statement, as it comes out of a STG case
 -- expression, for example, allows for a surprising amount of design decisions.
 -- Therefore, we cleanly separated this from the Stg → Cmm transformation, as
@@ -51,10 +50,9 @@ import qualified Data.Map as M
 -- See Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] why the two module are
 -- separated.
 
------------------------------------------------------------------------------
+
 -- Note [Magic Constants in GHC.Cmm.Switch]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- There are a lot of heuristics here that depend on magic values where it is
 -- hard to determine the "best" value (for whatever that means). These are the
 -- magic values:
@@ -83,7 +81,6 @@ minJumpTableOffset = 2
 
 -- Note [SwitchTargets]
 -- ~~~~~~~~~~~~~~~~~~~~
---
 -- The branches of a switch are stored in a SwitchTargets, which consists of an
 -- (optional) default jump target, and a map from values to jump targets.
 --
@@ -175,7 +172,6 @@ switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
 
 -- Note [Jump Table Offset]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Usually, the code for a jump table starting at x will first subtract x from
 -- the value, to avoid a large amount of empty entries. But if x is very small,
 -- the extra entries are no worse than the subtraction in terms of code size, and
@@ -239,7 +235,6 @@ data SwitchPlan
 --
 -- Note [createSwitchPlan]
 -- ~~~~~~~~~~~~~~~~~~~~~~~
---
 -- A SwitchPlan describes how a Switch statement is to be broken down into
 -- smaller pieces suitable for code generation.
 --
diff --git a/compiler/GHC/Cmm/Switch/Implement.hs b/compiler/GHC/Cmm/Switch/Implement.hs
index 87dfc1cdaaccf2be012338a4c840d920e80c7248..30265dc234b1aa99a2a53d84ac91197732828117 100644
--- a/compiler/GHC/Cmm/Switch/Implement.hs
+++ b/compiler/GHC/Cmm/Switch/Implement.hs
@@ -57,16 +57,15 @@ visitSwitches platform block
 
 -- Note [Floating switch expressions]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 -- When we translate a sparse switch into a search tree we would like
 -- to compute the value we compare against only once.
-
+--
 -- For this purpose we assign the switch expression to a local register
 -- and then use this register when constructing the actual binary tree.
-
+--
 -- This is important as the expression could contain expensive code like
 -- memory loads or divisions which we REALLY don't want to duplicate.
-
+--
 -- This happened in parts of the handwritten RTS Cmm code. See also #16933
 
 -- See Note [Floating switch expressions]
diff --git a/compiler/GHC/CmmToAsm.hs b/compiler/GHC/CmmToAsm.hs
index e7a392d822659e6f72b0cc410bef47c27073ce84..88c72f6b165df5b18e3726317006a28bcfba6394 100644
--- a/compiler/GHC/CmmToAsm.hs
+++ b/compiler/GHC/CmmToAsm.hs
@@ -192,7 +192,6 @@ data NativeGenAcc statics instr
 {-
 Note [Unwinding information in the NCG]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 Unwind information is a type of metadata which allows a debugging tool
 to reconstruct the values of machine registers at the time a procedure was
 entered. For the most part, the production of unwind information is handled by
diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
index 65872c73beff483a8c31e17ac1fc1153baa6ebfd..2698e6f17f19aa4352a9add74ebf5b19dbaab280 100644
--- a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
@@ -62,6 +62,7 @@ import GHC.Utils.Misc
 import GHC.Utils.Panic
 
 -- Note [General layout of an NCG]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- @cmmTopCodeGen@ will be our main entry point to code gen.  Here we'll get
 -- @RawCmmDecl@; see GHC.Cmm
 --
@@ -846,7 +847,7 @@ getRegister' config plat expr
         MO_Sub w -> intOp False w (\d x y -> unitOL $ annExpr expr (SUB d x y))
 
         -- Note [CSET]
-        --
+        -- ~~~~~~~~~~~
         -- Setting conditional flags: the architecture internally knows the
         -- following flag bits.  And based on thsoe comparisons as in the
         -- table below.
diff --git a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs
index 8d93a56395eacae8467f238833ab97617b017530..5a48241c0b4f07d2365b8b03c97e1945d81f08aa 100644
--- a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs
+++ b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs
@@ -147,7 +147,7 @@ pprBasicBlock config info_env (BasicBlock blockid instrs)
              then ppr (mkAsmTempEndLabel info_lbl) <> char ':'
              else empty)
     -- Make sure the info table has the right .loc for the block
-    -- coming right after it. See [Note: Info Offset]
+    -- coming right after it. See Note [Info Offset]
     infoTableLoc = case instrs of
       (l@LOCATION{} : _) -> pprInstr platform l
       _other             -> empty
@@ -187,11 +187,12 @@ pprGloblDecl platform lbl
   | otherwise = text "\t.globl " <> pdoc platform lbl
 
 -- Note [Always use objects for info tables]
--- See discussion in X86.Ppr
--- for why this is necessary.  Essentially we need to ensure that we never
--- pass function symbols when we migth want to lookup the info table.  If we
--- did, we could end up with procedure linking tables (PLT)s, and thus the
--- lookup wouldn't point to the function, but into the jump table.
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- See discussion in X86.Ppr for why this is necessary.  Essentially we need to
+-- ensure that we never pass function symbols when we migth want to lookup the
+-- info table.  If we did, we could end up with procedure linking tables
+-- (PLT)s, and thus the lookup wouldn't point to the function, but into the
+-- jump table.
 --
 -- Fun fact: The LLVMMangler exists to patch this issue su on the LLVM side as
 -- well.
diff --git a/compiler/GHC/CmmToAsm/BlockLayout.hs b/compiler/GHC/CmmToAsm/BlockLayout.hs
index 70e131c717052f97f2da97a0a4f72b8321751cd6..747702658e61f6f5ad2e99b50284bee94c5394c6 100644
--- a/compiler/GHC/CmmToAsm/BlockLayout.hs
+++ b/compiler/GHC/CmmToAsm/BlockLayout.hs
@@ -67,10 +67,9 @@ import GHC.Data.UnionFind
   * Feed this CFG into the block layout code (`sequenceTop`) in this
     module. Which will then produce a code layout based on the input weights.
 
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~~~ Note [Chain based CFG serialization]
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+  Note [Chain based CFG serialization]
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   For additional information also look at
   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/code-layout
 
@@ -189,10 +188,9 @@ import GHC.Data.UnionFind
   While E does not follow X it's still beneficial to place them near each other.
   This can be advantageous if eg C,X,E will end up in the same cache line.
 
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~~~ Note [Triangle Control Flow]
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+  Note [Triangle Control Flow]
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   Checking if an argument is already evaluated leads to a somewhat
   special case  which looks like this:
 
@@ -240,10 +238,9 @@ import GHC.Data.UnionFind
   Assuming that Lwork is large the chance that the "call" ends up
   in the same cache line is also fairly small.
 
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~~~ Note [Layout relevant edge weights]
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+  Note [Layout relevant edge weights]
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   The input to the chain based code layout algorithm is a CFG
   with edges annotated with their frequency. The frequency
   of traversal corresponds quite well to the cost of not placing
@@ -373,9 +370,9 @@ takeL :: Int -> BlockChain -> [BlockId]
 takeL n (BlockChain blks) =
     take n . fromOL $ blks
 
+
 -- Note [Combining neighborhood chains]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 -- See also Note [Chain based CFG serialization]
 -- We have the chains (A-B-C-D) and (E-F) and an Edge C->E.
 --
diff --git a/compiler/GHC/CmmToAsm/CFG.hs b/compiler/GHC/CmmToAsm/CFG.hs
index 58041fef2c953b71b1da82213b9555017576a718..0a662d7ff93b6daa70f2d35e5b044b5d901af4d7 100644
--- a/compiler/GHC/CmmToAsm/CFG.hs
+++ b/compiler/GHC/CmmToAsm/CFG.hs
@@ -150,7 +150,7 @@ instance Outputable CfgEdge where
 -- or has it been introduced during assembly codegen. We use this to maintain
 -- some information which would otherwise be lost during the
 -- Cmm \<-> asm transition.
--- See also Note [Inverting Conditional Branches]
+-- See also Note [Inverting conditions]
 data TransitionSource
   = CmmSource { trans_cmmNode :: (CmmNode O C)
               , trans_info :: BranchInfo }
@@ -248,7 +248,7 @@ filterEdges f cfg =
 
 
 {- Note [Updating the CFG during shortcutting]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 See Note [What is shortcutting] in the control flow optimization
 code (GHC.Cmm.ContFlowOpt) for a slightly more in depth explanation on shortcutting.
 
@@ -1013,7 +1013,6 @@ mkGlobalWeights root localCfg
 
 {- Note [Static Branch Prediction]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 The work here has been based on the paper
 "Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus.
 
diff --git a/compiler/GHC/CmmToAsm/Dwarf.hs b/compiler/GHC/CmmToAsm/Dwarf.hs
index fcff4be74e8d553dd5289a50700a82853f0d4d6a..07ca55d6d8421e21ce0f12678281728891b8bd95 100644
--- a/compiler/GHC/CmmToAsm/Dwarf.hs
+++ b/compiler/GHC/CmmToAsm/Dwarf.hs
@@ -148,7 +148,7 @@ debugSplitProcs b = concat $ H.mapElems $ mergeMaps $ map (split Nothing) b
 
 {-
 Note [Splitting DebugBlocks]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 DWARF requires that we break up the nested DebugBlocks produced from
 the C-- AST. For instance, we begin with tick trees containing nested procs.
 For example,
diff --git a/compiler/GHC/CmmToAsm/Dwarf/Types.hs b/compiler/GHC/CmmToAsm/Dwarf/Types.hs
index f8f0ae5c4436c3004b97e00321495fba8e59508d..e29f03e1d631d0355c41abdd5462cce3ad76446e 100644
--- a/compiler/GHC/CmmToAsm/Dwarf/Types.hs
+++ b/compiler/GHC/CmmToAsm/Dwarf/Types.hs
@@ -257,7 +257,7 @@ pprDwarfARanges platform arngs unitU =
 
 pprDwarfARange :: Platform -> DwarfARange -> SDoc
 pprDwarfARange platform arng =
-    -- Offset due to Note [Info offset].
+    -- Offset due to Note [Info Offset].
     pprWord platform (pdoc platform (dwArngStartLabel arng) <> text "-1")
     $$ pprWord platform length
   where
@@ -410,7 +410,6 @@ pprFrameBlock platform (DwarfFrameBlock hasInfo uws0) =
 
 -- Note [Info Offset]
 -- ~~~~~~~~~~~~~~~~~~
---
 -- GDB was pretty much written with C-like programs in mind, and as a
 -- result they assume that once you have a return address, it is a
 -- good idea to look at (PC-1) to unwind further - as that's where the
diff --git a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs
index 2184c0fc29e3cef882fcbef6e489fa573bc3ea4f..cd88a9f078cd96cb79fa25f293b9c2a48401e943 100644
--- a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs
@@ -738,6 +738,7 @@ temporary, then do the other computation, and then use the temporary:
 -}
 
 {- Note [Power instruction format]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 In some instructions the 16 bit offset must be a multiple of 4, i.e.
 the two least significant bits must be zero. The "Power ISA" specification
 calls these instruction formats "DS-FORM" and the instructions with
@@ -1210,6 +1211,7 @@ genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
                                       ]
 
 -- Note [Seemingly useless cmp and bne]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction
 -- the second paragraph says that isync may complete before storage accesses
 -- "associated" with a preceding instruction have been performed. The cmp
@@ -2535,12 +2537,14 @@ coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
 coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
 
 -- Note [.LCTOC1 in PPC PIC code]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
 -- to make the most of the PPC's 16-bit displacements.
 -- As 16-bit signed offset is used (usually via addi/lwz instructions)
 -- first element will have '-32768' offset against .LCTOC1.
 
 -- Note [implicit register in PPC PIC code]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- PPC generates calls by labels in assembly
 -- in form of:
 --     bl puts+32768@plt
diff --git a/compiler/GHC/CmmToAsm/Reg/Utils.hs b/compiler/GHC/CmmToAsm/Reg/Utils.hs
index 3a832963fea35a60cf301473b03f49918eba6d75..0a6bfabdbd51909daed0a06e11026f0b45d3198e 100644
--- a/compiler/GHC/CmmToAsm/Reg/Utils.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Utils.hs
@@ -4,7 +4,6 @@ where
 
 {- Note [UniqFM and the register allocator]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
    Before UniqFM had a key type the register allocator
    wasn't picky about key types, using VirtualReg, Reg
    and Unique at various use sites for the same map.
diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
index 52f2a521230d04a7f20ec6e9f349e8c419c48e57..028887a56fbfe0ff795c54d2629028f432ab112f 100644
--- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs
+++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs
@@ -129,7 +129,6 @@ cmmTopCodeGen (CmmData sec dat) =
 
 {- Note [Verifying basic blocks]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
    We want to guarantee a few things about the results
    of instruction selection.
 
@@ -231,7 +230,6 @@ addSpUnwindings instr = return $ unitOL instr
 
 {- Note [Keeping track of the current block]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 When generating instructions for Cmm we sometimes require
 the current block for things like retry loops.
 
@@ -2906,7 +2904,7 @@ evalArgs bid actuals
     newLocalReg ty = LocalReg <$> getUniqueM <*> pure ty
 
 -- Note [DIV/IDIV for bytes]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~
 -- IDIV reminder:
 --   Size    Dividend   Divisor   Quotient    Remainder
 --   byte    %ax         r/m8      %al          %ah
@@ -2990,7 +2988,7 @@ genCCall32' target dest_regs args = do
         let
             -- Align stack to 16n for calls, assuming a starting stack
             -- alignment of 16n - word_size on procedure entry. Which we
-            -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
+            -- maintiain. See Note [Stack Alignment on X86] in rts/StgCRun.c.
             sizes               = map (arg_size_bytes . cmmExprType platform) (reverse args)
             raw_arg_size        = sum sizes + platformWordSizeInBytes platform
             arg_pad_size        = (roundTo 16 $ raw_arg_size) - raw_arg_size
@@ -3605,10 +3603,8 @@ condIntReg cond x y = do
   return (Any II32 code)
 
 
------------------------------------------------------------
----          Note [SSE Parity Checks]                   ---
------------------------------------------------------------
-
+-- Note [SSE Parity Checks]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~
 -- We have to worry about unordered operands (eg. comparisons
 -- against NaN).  If the operands are unordered, the comparison
 -- sets the parity flag, carry flag and zero flag.
diff --git a/compiler/GHC/CmmToAsm/X86/Instr.hs b/compiler/GHC/CmmToAsm/X86/Instr.hs
index 947a25b2d83d337b356f2ffbb79ad6653dc54f8a..1f1515b0c93db5a0ef5f424419aad916b86c1e0e 100644
--- a/compiler/GHC/CmmToAsm/X86/Instr.hs
+++ b/compiler/GHC/CmmToAsm/X86/Instr.hs
@@ -120,7 +120,7 @@ Hence GLDZ and GLD1.  Bwahahahahahahaha!
 
 {-
 Note [x86 Floating point precision]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Intel's internal floating point registers are by default 80 bit
 extended precision.  This means that all operations done on values in
 registers are done at 80 bits, and unless the intermediate values are
@@ -795,6 +795,8 @@ mkJumpInstr id
         = [JXX ALWAYS id]
 
 -- Note [Windows stack layout]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 -- | On most OSes the kernel will place a guard page after the current stack
 --   page.  If you allocate larger than a page worth you may jump over this
 --   guard page.  Not only is this a security issue, but on certain OSes such
@@ -896,9 +898,8 @@ mkStackDeallocInstr platform amount
       _ -> panic "X86.mkStackDeallocInstr"
 
 
---
 -- Note [extra spill slots]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~
 -- If the register allocator used more spill slots than we have
 -- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
 -- C stack space on entry and exit from this proc.  Therefore we
diff --git a/compiler/GHC/CmmToAsm/X86/Ppr.hs b/compiler/GHC/CmmToAsm/X86/Ppr.hs
index 15e1b961df3fd8e3b76ee8741bfe7933fe157b99..49b6988c1d78ee8060af7ca2b27bb204a8ab106b 100644
--- a/compiler/GHC/CmmToAsm/X86/Ppr.hs
+++ b/compiler/GHC/CmmToAsm/X86/Ppr.hs
@@ -48,12 +48,8 @@ import GHC.Utils.Panic
 
 import Data.Word
 
--- -----------------------------------------------------------------------------
--- Printing this stuff out
---
---
 -- Note [Subsections Via Symbols]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- If we are using the .subsections_via_symbols directive
 -- (available on recent versions of Darwin),
 -- we have to make sure that there is some kind of reference
@@ -163,7 +159,7 @@ pprBasicBlock config info_env (BasicBlock blockid instrs)
            ppWhen (ncgDwarfEnabled config) (pdoc platform (mkAsmTempEndLabel infoLbl) <> colon)
 
     -- Make sure the info table has the right .loc for the block
-    -- coming right after it. See [Note: Info Offset]
+    -- coming right after it. See Note [Info Offset]
     infoTableLoc = case instrs of
       (l@LOCATION{} : _) -> pprInstr platform l
       _other             -> empty
diff --git a/compiler/GHC/CmmToC.hs b/compiler/GHC/CmmToC.hs
index 6528f6392101fb92b738aea886f0f19eaa36786c..a6a036c290d5a977df4275e437eab0bc27b26c3e 100644
--- a/compiler/GHC/CmmToC.hs
+++ b/compiler/GHC/CmmToC.hs
@@ -167,6 +167,7 @@ pprAlignment words =
      text "__attribute__((aligned(" <> int (widthInBytes words) <> text ")))"
 
 -- Note [StgWord alignment]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~
 -- C codegen builds static closures as StgWord C arrays (pprWordArray).
 -- Their real C type is 'StgClosure'. Macros like UNTAG_CLOSURE assume
 -- pointers to 'StgClosure' are aligned at pointer size boundary:
diff --git a/compiler/GHC/CmmToLlvm/Base.hs b/compiler/GHC/CmmToLlvm/Base.hs
index b209c4cd67e3475943094b55e98199315a62548c..cc4377240be969193c196f666f35c126c4c3e97d 100644
--- a/compiler/GHC/CmmToLlvm/Base.hs
+++ b/compiler/GHC/CmmToLlvm/Base.hs
@@ -591,7 +591,7 @@ aliasify (LMGlobal var val) = do
            ]
 
 -- Note [Llvm Forward References]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- The issue here is that LLVM insists on being strongly typed at
 -- every corner, so the first time we mention something, we have to
 -- settle what type we assign to it. That makes things awkward, as Cmm
diff --git a/compiler/GHC/CmmToLlvm/CodeGen.hs b/compiler/GHC/CmmToLlvm/CodeGen.hs
index 9e20b65a809171312a333fbe1bb5ecbc643d4dbb..a57a6f79f02d1720670f29485f32c188bab2b1f5 100644
--- a/compiler/GHC/CmmToLlvm/CodeGen.hs
+++ b/compiler/GHC/CmmToLlvm/CodeGen.hs
@@ -1255,7 +1255,6 @@ genExpectLit expLit expTy var = do
 
 {- Note [Literals and branch conditions]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 It is important that whenever we generate branch conditions for
 literals like '1', they are properly narrowed to an LLVM expression of
 type 'i1' (for bools.) Otherwise, nobody is happy. So when we convert
diff --git a/compiler/GHC/Core/Coercion.hs b/compiler/GHC/Core/Coercion.hs
index 6f5c1ac3381f233990fd93d3ac5c81edbe91b850..ef6d4af5ecd5f69a4e30183f3d8399494290fe45 100644
--- a/compiler/GHC/Core/Coercion.hs
+++ b/compiler/GHC/Core/Coercion.hs
@@ -843,7 +843,7 @@ mkAppCos :: Coercion
 mkAppCos co1 cos = foldl' mkAppCo co1 cos
 
 {- Note [Unused coercion variable in ForAllCo]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 See Note [Unused coercion variable in ForAllTy] in GHC.Core.TyCo.Rep for the
 motivation for checking coercion variable in types.
 To lift the design choice to (ForAllCo cv kind_co body_co), we have two options:
@@ -2117,7 +2117,7 @@ liftCoSubstTyVar (LC subst env) r v
   = Just $ mkReflCo r (substTyVar subst v)
 
 {- Note [liftCoSubstVarBndr]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~
 callback:
   'liftCoSubstVarBndrUsing' needs to be general enough to work in two
   situations:
diff --git a/compiler/GHC/Core/FamInstEnv.hs b/compiler/GHC/Core/FamInstEnv.hs
index c1715cc270bf00f379791ab8ecc1c409f7706313..c0981ac9e106219a649c212c69812a49f395db18 100644
--- a/compiler/GHC/Core/FamInstEnv.hs
+++ b/compiler/GHC/Core/FamInstEnv.hs
@@ -815,7 +815,7 @@ lookupFamInstEnvConflicts envs fam_inst@(FamInst { fi_axiom = new_axiom })
          if compatibleBranches (coAxiomSingleBranch old_axiom) new_branch
            then Nothing
            else Just noSubst
-      -- Note [Family instance overlap conflicts]
+      -- See Note [Family instance overlap conflicts]
 
     noSubst = panic "lookupFamInstEnvConflicts noSubst"
     new_branch = coAxiomSingleBranch new_axiom
@@ -826,7 +826,6 @@ lookupFamInstEnvConflicts envs fam_inst@(FamInst { fi_axiom = new_axiom })
 
 {- Note [Verifying injectivity annotation]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 Injectivity means that the RHS of a type family uniquely determines the LHS (see
 Note [Type inference for type families with injectivity]).  The user informs us about
 injectivity using an injectivity annotation and it is GHC's task to verify that
diff --git a/compiler/GHC/Core/InstEnv.hs b/compiler/GHC/Core/InstEnv.hs
index 3bb9a32a5077f3483b517c1d5e9d8dc18cbe93a0..ab23fcae2c924baa7145a4a1dafacc5cbf737649 100644
--- a/compiler/GHC/Core/InstEnv.hs
+++ b/compiler/GHC/Core/InstEnv.hs
@@ -869,7 +869,7 @@ lookupInstEnv' ie vis_mods cls tys
           -- apply in the future. This covers an instance like C Int and
           -- a target like [W] C (F a), where F is a type family.
             SurelyApart              -> find ms us        rest
-              -- Note [Infinitary substitution in lookup]
+              -- See Note [Infinitary substitution in lookup]
             MaybeApart MARInfinite _ -> find ms us        rest
             _                        -> find ms (item:us) rest
       where
diff --git a/compiler/GHC/Core/Lint.hs b/compiler/GHC/Core/Lint.hs
index c098afd57c429b3447029616c86932fab1908d0f..ec9b024fc50528bf8589bab91e8041f44f8f79a2 100644
--- a/compiler/GHC/Core/Lint.hs
+++ b/compiler/GHC/Core/Lint.hs
@@ -640,8 +640,8 @@ lintLetBind top_lvl rec_flag binder rhs rhs_ty
            (badBndrTyMsg binder (text "unlifted"))
 
         -- Check that if the binder is at the top level and has type Addr#,
-        -- that it is a string literal, see
-        -- Note [Core top-level string literals].
+        -- that it is a string literal.
+        -- See Note [Core top-level string literals].
        ; checkL (not (isTopLevel top_lvl && binder_ty `eqType` addrPrimTy)
                  || exprIsTickedString rhs)
            (mkTopNonLitStrMsg binder)
@@ -1005,8 +1005,8 @@ lintCoreFun (Var var) nargs
   = lintIdOcc var nargs
 
 lintCoreFun (Lam var body) nargs
-  -- Act like lintCoreExpr of Lam, but *don't* call markAllJoinsBad; see
-  -- Note [Beta redexes]
+  -- Act like lintCoreExpr of Lam, but *don't* call markAllJoinsBad;
+  -- See Note [Beta redexes]
   | nargs /= 0
   = lintLambda var $ lintCoreFun body (nargs - 1)
 
diff --git a/compiler/GHC/Core/Make.hs b/compiler/GHC/Core/Make.hs
index 0ab8a151bc7a4ed2346ea280c7e28637c9771d3d..06cb867c94bfea4b5b96b0170f9e0f4edc7c2134 100644
--- a/compiler/GHC/Core/Make.hs
+++ b/compiler/GHC/Core/Make.hs
@@ -836,7 +836,6 @@ tYPE_ERROR_ID                   = mkRuntimeErrorId typeErrorName
 
 -- Note [aBSENT_SUM_FIELD_ERROR_ID]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Unboxed sums are transformed into unboxed tuples in GHC.Stg.Unarise.mkUbxSum
 -- and fields that can't be reached are filled with rubbish values. It's easy to
 -- come up with rubbish literal values: we use 0 (ints/words) and 0.0
diff --git a/compiler/GHC/Core/Map/Expr.hs b/compiler/GHC/Core/Map/Expr.hs
index 4c79cd880a2b1bfeabfd73eb90036d5b4e9b360a..60ee2c94b55e607dd49cf4b13f6b22ede98db207 100644
--- a/compiler/GHC/Core/Map/Expr.hs
+++ b/compiler/GHC/Core/Map/Expr.hs
@@ -206,7 +206,6 @@ eqCoreExpr e1 e2 = eqDeBruijnExpr (deBruijnize e1) (deBruijnize e2)
 
 {- Note [Alpha-equality for Coercion arguments]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 The 'Coercion' constructor only appears in argument positions, and so, if the
 functions are equal, then the arguments must have equal types. Because the
 comparison for coercions (correctly) checks only their types, checking for
@@ -215,7 +214,6 @@ alpha-equality of the coercions is redundant.
 
 {- Note [Alpha-equality for let-bindings]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 For /recursive/ let-bindings we need to check that the types of the binders
 are alpha-equivalent. Otherwise
 
diff --git a/compiler/GHC/Core/Multiplicity.hs b/compiler/GHC/Core/Multiplicity.hs
index 2c3828c712a9b35ae9efb86a4b48bc47a02aabdc..b3c268c35606fec57f92912f95d5cd101f46f6aa 100644
--- a/compiler/GHC/Core/Multiplicity.hs
+++ b/compiler/GHC/Core/Multiplicity.hs
@@ -219,7 +219,7 @@ We have
 
 The goal is to maximise reuse of types between linear code and traditional
 code. This is argued at length in the proposal and the article (links in Note
-[Linear Types]).
+[Linear types]).
 
 Note [Polymorphisation of linear fields]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/compiler/GHC/Core/Opt/Arity.hs b/compiler/GHC/Core/Opt/Arity.hs
index 9ff08b142ba12fe72e5afe076cb84f808928fb96..ceef44afbf5195607df6c5f6afea211880bcd90c 100644
--- a/compiler/GHC/Core/Opt/Arity.hs
+++ b/compiler/GHC/Core/Opt/Arity.hs
@@ -127,7 +127,7 @@ exprArity e = go e
                  | otherwise       = go e
     go (Tick t e) | not (tickishIsCode t) = go e
     go (Cast e co)                 = trim_arity (go e) (coercionRKind co)
-                                        -- Note [exprArity invariant]
+                                        -- See Note [exprArity invariant]
     go (App e (Type _))            = go e
     go (App f a) | exprIsTrivial a = (go f - 1) `max` 0
         -- See Note [exprArity for applications]
@@ -155,7 +155,7 @@ typeArity ty
 
       | Just (tc,tys) <- splitTyConApp_maybe ty
       , Just (ty', _) <- instNewTyCon_maybe tc tys
-      , Just rec_nts' <- checkRecTc rec_nts tc  -- See Note [Expanding newtypes]
+      , Just rec_nts' <- checkRecTc rec_nts tc  -- See Note [Expanding newtypes and products]
                                                 -- in GHC.Core.TyCon
 --   , not (isClassTyCon tc)    -- Do not eta-expand through newtype classes
 --                              -- See Note [Newtype classes and eta expansion]
@@ -708,7 +708,7 @@ until it finds a stable arity type. Two wrinkles
   by the 'am_sigs' field in 'FindRhsArity', and 'lookupSigEnv' in the Var case
   of 'arityType'.
 
-Note [Exciting Arity]
+Note [Exciting arity]
 ~~~~~~~~~~~~~~~~~~~~~
 The fixed-point iteration in 'findRhsArity' stabilises very quickly in almost
 all cases. To get notified of cases where we need an usual number of iterations,
@@ -1047,8 +1047,8 @@ arityType env (App fun arg )
         --
 arityType env (Case scrut bndr _ alts)
   | exprIsDeadEnd scrut || null alts
-  = botArityType    -- Do not eta expand. See Note [Dealing with bottom (1)]
-  | not (pedanticBottoms env)  -- See Note [Dealing with bottom (2)]
+  = botArityType    -- Do not eta expand. See (1) in Note [Dealing with bottom]
+  | not (pedanticBottoms env)  -- See (2) in Note [Dealing with bottom]
   , myExprIsCheap env scrut (Just (idType bndr))
   = alts_type
   | exprOkForSpeculation scrut
@@ -1514,7 +1514,7 @@ etaInfoApp in_scope expr eis
         (subst', b') = Core.substBindSC subst b
 
     -- Beta-reduction if possible, pushing any intervening casts past
-    -- the argument. See Note [The EtaInfo mechansim]
+    -- the argument. See Note [The EtaInfo mechanism]
     go subst (Lam v e) (EI (b:bs) mco)
       | Just (arg,mco') <- pushMCoArg mco (varToCoreExpr b)
       = go (Core.extendSubst subst v arg) e (EI bs mco')
diff --git a/compiler/GHC/Core/Opt/CallArity.hs b/compiler/GHC/Core/Opt/CallArity.hs
index c551227486658b07158616ae10c1706963f55c6b..656d6a9fc19d562a84ad1aaa787b205bad93a271 100644
--- a/compiler/GHC/Core/Opt/CallArity.hs
+++ b/compiler/GHC/Core/Opt/CallArity.hs
@@ -99,7 +99,7 @@ The two analysis are not completely independent, as a higher arity can improve
 the information about what variables are being called once or multiple times.
 
 Note [Analysis I: The arity analysis]
-------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The arity analysis is quite straightforward: The information about an
 expression is an
@@ -115,7 +115,7 @@ minimum (considering Nothing an infinity).
 
 
 Note [Analysis II: The Co-Called analysis]
-------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The second part is more sophisticated. For reasons explained below, it is not
 sufficient to simply know how often an expression evaluates a variable. Instead
@@ -438,7 +438,7 @@ callArityAnalProgram binds = binds'
   where
     (_, binds') = callArityTopLvl [] emptyVarSet binds
 
--- See Note [Analysing top-level-binds]
+-- See Note [Analysing top-level binds]
 callArityTopLvl :: [Var] -> VarSet -> [CoreBind] -> (CallArityRes, [CoreBind])
 callArityTopLvl exported _ []
     = ( calledMultipleTimes $ (emptyUnVarGraph, mkVarEnv $ [(v, 0) | v <- exported])
diff --git a/compiler/GHC/Core/Opt/ConstantFold.hs b/compiler/GHC/Core/Opt/ConstantFold.hs
index 8910257477851e162fcaeee6e1df39fbf194b9b4..bb44ed4bd556bf954cca16de58438d92a6de3936 100644
--- a/compiler/GHC/Core/Opt/ConstantFold.hs
+++ b/compiler/GHC/Core/Opt/ConstantFold.hs
@@ -1715,7 +1715,9 @@ guardDoubleDiv = do
   [Lit (LitDouble d1), Lit (LitDouble d2)] <- getArgs
   guard $ (d1 /=0 || d2 > 0) -- see Note [negative zero]
        && d2 /= 0            -- avoid NaN and Infinity/-Infinity
--- Note [negative zero] Avoid (0 / -d), otherwise 0/(-1) reduces to
+-- Note [negative zero]
+-- ~~~~~~~~~~~~~~~~~~~~
+-- Avoid (0 / -d), otherwise 0/(-1) reduces to
 -- zero, but we might want to preserve the negative zero here which
 -- is representable in Float/Double but not in (normalised)
 -- Rational. (#3676) Perhaps we should generate (0 :% (-1)) instead?
@@ -1732,14 +1734,12 @@ strengthReduction two_lit add_op = do -- Note [Strength reduction]
 
 -- Note [Strength reduction]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- This rule turns floating point multiplications of the form 2.0 * x and
 -- x * 2.0 into x + x addition, because addition costs less than multiplication.
 -- See #7116
 
 -- Note [What's true and false]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- trueValInt and falseValInt represent true and false values returned by
 -- comparison primops for Char, Int, Word, Integer, Double, Float and Addr.
 -- True is represented as an unboxed 1# literal, while false is represented
@@ -1820,7 +1820,7 @@ tagToEnumRule = do
 
 ------------------------------
 dataToTagRule :: RuleM CoreExpr
--- See Note [dataToTag#] in primops.txt.pp
+-- See Note [dataToTag# magic].
 dataToTagRule = a `mplus` b
   where
     -- dataToTag (tagToEnum x)   ==>   x
@@ -2465,7 +2465,6 @@ match_cstring_length rule_env env _ [lit1]
      in Just (Lit (mkLitInt (roPlatform rule_env) (fromIntegral len)))
 match_cstring_length _ _ _ _ = Nothing
 
----------------------------------------------------
 {- Note [inlineId magic]
 ~~~~~~~~~~~~~~~~~~~~~~~~
 The call 'inline f' arranges that 'f' is inlined, regardless of
@@ -3306,7 +3305,7 @@ Instead, we deal with turning one branch into DEFAULT in GHC.Core.Opt.Simplify.U
 
 Note [caseRules for dataToTag]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-See also Note [dataToTag#] in primops.txt.pp
+See also Note [dataToTag# magic].
 
 We want to transform
   case dataToTag x of
diff --git a/compiler/GHC/Core/Opt/FloatIn.hs b/compiler/GHC/Core/Opt/FloatIn.hs
index 6e4b72431062f1d93bef12bd7a18b58cf8cf7038..37cb23e338bf43cdf9c127fa7b3d8e201100168d 100644
--- a/compiler/GHC/Core/Opt/FloatIn.hs
+++ b/compiler/GHC/Core/Opt/FloatIn.hs
@@ -353,7 +353,7 @@ So: rather than drop \tr{w}'s binding here, we add it onto the list of
 things to drop in the outer let's body, and let nature take its
 course.
 
-Note [extra_fvs (1): avoid floating into RHS]
+Note [extra_fvs (1)]: avoid floating into RHS
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Consider let x=\y....t... in body.  We do not necessarily want to float
 a binding for t into the RHS, because it'll immediately be floated out
@@ -371,7 +371,7 @@ can't have unboxed bindings.
 So we make "extra_fvs" which is the rhs_fvs of such bindings, and
 arrange to dump bindings that bind extra_fvs before the entire let.
 
-Note [extra_fvs (2): free variables of rules]
+Note [extra_fvs (2)]: free variables of rules
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Consider
   let x{rule mentioning y} = rhs in body
@@ -504,7 +504,7 @@ fiBind :: Platform
 
 fiBind platform to_drop (AnnNonRec id ann_rhs@(rhs_fvs, rhs)) body_fvs
   = ( extra_binds ++ shared_binds          -- Land these before
-                                           -- See Note [extra_fvs (1,2)]
+                                           -- See Note [extra_fvs (1)] and Note [extra_fvs (2)]
     , FB (unitDVarSet id) rhs_fvs'         -- The new binding itself
           (FloatLet (NonRec id rhs'))
     , body_binds )                         -- Land these after
@@ -512,12 +512,12 @@ fiBind platform to_drop (AnnNonRec id ann_rhs@(rhs_fvs, rhs)) body_fvs
   where
     body_fvs2 = body_fvs `delDVarSet` id
 
-    rule_fvs = bndrRuleAndUnfoldingVarsDSet id        -- See Note [extra_fvs (2): free variables of rules]
+    rule_fvs = bndrRuleAndUnfoldingVarsDSet id        -- See Note [extra_fvs (2)]
     extra_fvs | noFloatIntoRhs NonRecursive id rhs
               = rule_fvs `unionDVarSet` rhs_fvs
               | otherwise
               = rule_fvs
-        -- See Note [extra_fvs (1): avoid floating into RHS]
+        -- See Note [extra_fvs (1)]
         -- No point in floating in only to float straight out again
         -- We *can't* float into ok-for-speculation unlifted RHSs
         -- But do float into join points
@@ -601,7 +601,7 @@ noFloatIntoArg expr expr_ty
    || all isTyVar (bndr:bndrs)     -- Wrinkle 1 (b)
       -- See Note [noFloatInto considerations] wrinkle 2
 
-  | otherwise  -- Note [noFloatInto considerations] wrinkle 2
+  | otherwise  -- See Note [noFloatInto considerations] wrinkle 2
   = exprIsTrivial deann_expr || exprIsHNF deann_expr
   where
     deann_expr = deAnnotate' expr
@@ -742,7 +742,6 @@ sepBindsByDropPoint platform is_case drop_pts floaters
 
 {- Note [Duplicating floats]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 For case expressions we duplicate the binding if it is reasonably
 small, and if it is not used in all the RHSs This is good for
 situations like
diff --git a/compiler/GHC/Core/Opt/FloatOut.hs b/compiler/GHC/Core/Opt/FloatOut.hs
index fbed53fbf349603b88052a1bda0bf3dd97b795a2..1a88c97d55e08016a7ff0c766b5d35d6dd0238f6 100644
--- a/compiler/GHC/Core/Opt/FloatOut.hs
+++ b/compiler/GHC/Core/Opt/FloatOut.hs
@@ -280,7 +280,7 @@ splitRecFloats fs
                                                    -- non-rec
 
 installUnderLambdas :: Bag FloatBind -> CoreExpr -> CoreExpr
--- Note [Floating out of Rec rhss]
+-- See Note [Floating out of Rec rhss]
 installUnderLambdas floats e
   | isEmptyBag floats = e
   | otherwise         = go e
@@ -374,7 +374,6 @@ floatBody lvl arg       -- Used rec rhss, and case-alternative rhss
 
 {- Note [Floating past breakpoints]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 We used to disallow floating out of breakpoint ticks (see #10052). However, I
 think this is too restrictive.
 
@@ -428,7 +427,7 @@ floatExpr (Tick tickish expr)
     in
     (fs, annotated_defns, Tick tickish expr') }
 
-  -- Note [Floating past breakpoints]
+  -- See Note [Floating past breakpoints]
   | Breakpoint{} <- tickish
   = case (floatExpr expr)    of { (fs, floating_defns, expr') ->
     (fs, floating_defns, Tick tickish expr') }
diff --git a/compiler/GHC/Core/Opt/LiberateCase.hs b/compiler/GHC/Core/Opt/LiberateCase.hs
index 3c9eb5c3d0f9d3ac21fccfc2ae0db5ca737e9954..1598526ada0f4145498a959b0f7595bccb96370e 100644
--- a/compiler/GHC/Core/Opt/LiberateCase.hs
+++ b/compiler/GHC/Core/Opt/LiberateCase.hs
@@ -168,7 +168,7 @@ libCaseBind env (Rec pairs)
 
     ok_pair (id,_)
         =  idArity id > 0       -- Note [Only functions!]
-        && not (isDeadEndId id) -- Note [Not bottoming ids]
+        && not (isDeadEndId id) -- Note [Not bottoming Ids]
 
 {- Note [Not bottoming Ids]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs
index 4f5ece8fcac76a984a77aa5fef5301ea7abd0797..575512b7c8fb76e4179980fad9555ba9664be308 100644
--- a/compiler/GHC/Core/Opt/Simplify.hs
+++ b/compiler/GHC/Core/Opt/Simplify.hs
@@ -1393,6 +1393,7 @@ simplTick env tickish expr cont
   getDoneId other = pprPanic "getDoneId" (ppr other)
 
 -- Note [case-of-scc-of-case]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- It's pretty important to be able to transform case-of-case when
 -- there's an SCC in the way.  For example, the following comes up
 -- in nofib/real/compress/Encode.hs:
@@ -1992,6 +1993,7 @@ simplIdF env var cont
               cont' = trimJoinCont var mb_join cont
           in simplExprF env' e cont'
               -- Note [zapSubstEnv]
+              -- ~~~~~~~~~~~~~~~~~~
               -- The template is already simplified, so don't re-substitute.
               -- This is VITAL.  Consider
               --      let x = e in
diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs
index e288646d74ca2accbf9e13dc15ca73ea33b2e9c0..409f3176eb1556509a2b844bc55dfeb83197916f 100644
--- a/compiler/GHC/Core/Opt/Simplify/Utils.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs
@@ -1293,7 +1293,7 @@ preInlineUnconditionally env top_lvl bndr rhs rhs_env
   | not (one_occ (idOccInfo bndr))           = Nothing
   | not (isStableUnfolding unf)              = Just $! (extend_subst_with rhs)
 
-  -- Note [Stable unfoldings and preInlineUnconditionally]
+  -- See Note [Stable unfoldings and preInlineUnconditionally]
   | not (isInlinePragma inline_prag)
   , Just inl <- maybeUnfoldingTemplate unf   = Just $! (extend_subst_with inl)
   | otherwise                                = Nothing
@@ -1395,7 +1395,7 @@ our new view that inlining is like a RULE, so I'm sticking to the 'active'
 story for now.
 
 NB: unconditional inlining of this sort can introduce ticks in places that
-may seem surprising; for instance, the LHS of rules. See Note [Simplfying
+may seem surprising; for instance, the LHS of rules. See Note [Simplifying
 rules] for details.
 -}
 
@@ -2196,7 +2196,7 @@ prepareAlts scrut case_bndr' alts
 
 mkCase tries these things
 
-* Note [Nerge nested cases]
+* Note [Merge nested cases]
 * Note [Eliminate identity case]
 * Note [Scrutinee constant folding]
 
diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs
index 9c4c52107afa746d1d15ec0c7f53e715b93fa839..afd8afc5ea9c4db72961b4f5c65f09b95ff5661b 100644
--- a/compiler/GHC/Core/Opt/SpecConstr.hs
+++ b/compiler/GHC/Core/Opt/SpecConstr.hs
@@ -1181,8 +1181,8 @@ data ArgOcc = NoOcc     -- Doesn't occur at all; or a type argument
             | ScrutOcc  -- See Note [ScrutOcc]
                  (DataConEnv [ArgOcc])   -- How the sub-components are used
 
-{- Note  [ScrutOcc]
-~~~~~~~~~~~~~~~~~~~
+{- Note [ScrutOcc]
+~~~~~~~~~~~~~~~~~~
 An occurrence of ScrutOcc indicates that the thing, or a `cast` version of the thing,
 is *only* taken apart or applied.
 
@@ -1316,7 +1316,7 @@ scExpr' env (Let (NonRec bndr rhs) body)
         ; rhs_info  <- scRecRhs env (bndr',rhs)
 
         ; let body_env2 = extendHowBound body_env [bndr'] RecFun
-                           -- Note [Local let bindings]
+                           -- See Note [Local let bindings]
               rhs'      = ri_new_rhs rhs_info
               body_env3 = extendValEnv body_env2 bndr' (isValue (sc_vals env) rhs')
 
diff --git a/compiler/GHC/Core/Opt/Specialise.hs b/compiler/GHC/Core/Opt/Specialise.hs
index 3c2d10823d9352b2019640d7d8e6b610b823f90d..25e4859300469443847e2daa34fd3671d5f497ca 100644
--- a/compiler/GHC/Core/Opt/Specialise.hs
+++ b/compiler/GHC/Core/Opt/Specialise.hs
@@ -809,7 +809,7 @@ canSpecImport dflags fn
 tryWarnMissingSpecs :: DynFlags -> [Id] -> Id -> [CallInfo] -> CoreM ()
 -- See Note [Warning about missed specialisations]
 tryWarnMissingSpecs dflags callers fn calls_for_fn
-  | isClassOpId fn = return () -- See Note [Missed specialization for ClassOps]
+  | isClassOpId fn = return () -- See Note [Missed specialisation for ClassOps]
   | wopt Opt_WarnMissedSpecs dflags
     && not (null callers)
     && allCallersInlined                  = doWarn $ WarningWithFlag Opt_WarnMissedSpecs
@@ -1434,7 +1434,7 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
         -- See Note [Auto-specialisation and RULES]
 
 --   && not (certainlyWillInline (idUnfolding fn))      -- And it's not small
---      See Note [Inline specialisation] for why we do not
+--      See Note [Inline specialisations] for why we do not
 --      switch off specialisation for inline functions
 
   = -- pprTrace "specDefn: some" (ppr fn $$ ppr calls_for_me $$ ppr existing_rules) $
diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs
index 5b31f76ed140f13371dc8cb993cf463e176f41f1..6180a69ab86bbb145991a70a2c20a7197720a13a 100644
--- a/compiler/GHC/Core/Opt/WorkWrap.hs
+++ b/compiler/GHC/Core/Opt/WorkWrap.hs
@@ -821,7 +821,7 @@ mkWWBindPair ww_opts fn_id fn_info fn_args fn_body work_uniq div
 
     work_arity = length work_demands
 
-    -- See Note [Demand on the Worker]
+    -- See Note [Demand on the worker]
     single_call = saturatedByOneShots arity (demandInfo fn_info)
     worker_demand | single_call = mkWorkerDemand work_arity
                   | otherwise   = topDmd
@@ -845,7 +845,7 @@ mkStrWrapperInlinePrag :: InlinePragma -> [CoreRule] -> InlinePragma
 -- See Note [Wrapper activation]
 mkStrWrapperInlinePrag (InlinePragma { inl_act = act, inl_rule = rule_info }) rules
   = InlinePragma { inl_src    = SourceText "{-# INLINE"
-                 , inl_inline = NoUserInlinePrag -- See Note [Wrapper NoUserInline]
+                 , inl_inline = NoUserInlinePrag -- See Note [Wrapper NoUserInlinePrag]
                  , inl_sat    = Nothing
                  , inl_act    = activeAfter wrapper_phase
                  , inl_rule   = rule_info }  -- RuleMatchInfo is (and must be) unaffected
diff --git a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
index 698a85988ac34349f81ce6b13f8e041998641f90..86e57286c13f0cf31cf5c2f4d350847cee9f590e 100644
--- a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
+++ b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
@@ -576,7 +576,7 @@ wantToUnboxArg fam_envs ty (n :* sd)
   | Just (tc, tc_args, co) <- normSplitTyConApp_maybe fam_envs ty
   , Just dc <- tyConSingleAlgDataCon_maybe tc
   , let arity = dataConRepArity dc
-  , Just (Unboxed, ds) <- viewProd arity sd -- See Note [Boxity Analysis]
+  , Just (Unboxed, ds) <- viewProd arity sd -- See Note [Boxity analysis]
   -- NB: No strictness or evaluatedness checks here. That is done by
   -- 'finaliseBoxity'!
   = Unbox (DataConPatContext dc tc_args co) ds
diff --git a/compiler/GHC/Core/TyCo/Subst.hs b/compiler/GHC/Core/TyCo/Subst.hs
index ec77cd26711328beb49e949bc71cf5c1ba2a6d7a..32817ca6c7eade997a7abd44f386c6c21c72ea95 100644
--- a/compiler/GHC/Core/TyCo/Subst.hs
+++ b/compiler/GHC/Core/TyCo/Subst.hs
@@ -179,8 +179,8 @@ variations happen to; for example [a -> (a, b)].
 A TCvSubst is not idempotent, but, unlike the non-idempotent substitution
 we use during unifications, it must not be repeatedly applied.
 
-Note [Extending the TvSubstEnv]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [Extending the TCvSubstEnv]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 See #tcvsubst_invariant# for the invariants that must hold.
 
 This invariant allows a short-cut when the subst envs are empty:
diff --git a/compiler/GHC/Core/TyCon.hs b/compiler/GHC/Core/TyCon.hs
index 7c4ad2dfcf8cc7cedac83fa21276ca886f5cb83e..df8bf09fa0dc72f386f4ade6a949645c3bf344f4 100644
--- a/compiler/GHC/Core/TyCon.hs
+++ b/compiler/GHC/Core/TyCon.hs
@@ -442,7 +442,7 @@ See #19367.
 type TyConBinder     = VarBndr TyVar   TyConBndrVis
 type TyConTyCoBinder = VarBndr TyCoVar TyConBndrVis
      -- Only PromotedDataCon has TyConTyCoBinders
-     -- See Note [Promoted GADT data construtors]
+     -- See Note [Promoted GADT data constructors]
 
 data TyConBndrVis
   = NamedTCB ArgFlag
@@ -1712,7 +1712,7 @@ isGcPtrRep UnliftedRep = True
 isGcPtrRep _           = False
 
 -- A PrimRep is compatible with another iff one can be coerced to the other.
--- See Note [bad unsafe coercion] in GHC.Core.Lint for when are two types coercible.
+-- See Note [Bad unsafe coercion] in GHC.Core.Lint for when are two types coercible.
 primRepCompatible :: Platform -> PrimRep -> PrimRep -> Bool
 primRepCompatible platform rep1 rep2 =
     (isUnboxed rep1 == isUnboxed rep2) &&
@@ -2436,7 +2436,7 @@ setTcTyConKind :: TyCon -> Kind -> TyCon
 -- Update the Kind of a TcTyCon
 -- The new kind is always a zonked version of its previous
 -- kind, so we don't need to update any other fields.
--- See Note [The Purely Kinded Invariant] in GHC.Tc.Gen.HsType
+-- See Note [The Purely Kinded Type Invariant (PKTI)] in GHC.Tc.Gen.HsType
 setTcTyConKind tc@(TcTyCon {}) kind = let tc' = tc { tyConKind = kind
                                                    , tyConNullaryTy = mkNakedTyConTy tc'
                                                        -- see Note [Sharing nullary TyCons]
diff --git a/compiler/GHC/Core/Unfold.hs b/compiler/GHC/Core/Unfold.hs
index 08c5a10b30e29f0d6b9eb67e2323fd69ff3a97f8..c3411079574c0adcc0b0a2ed042f402c687414c8 100644
--- a/compiler/GHC/Core/Unfold.hs
+++ b/compiler/GHC/Core/Unfold.hs
@@ -1246,8 +1246,12 @@ tryUnfolding logger opts !case_depth id lone_variable
 
 
 {-
-Note [Unfold into lazy contexts], Note [RHS of lets]
+Note [Unfold into lazy contexts]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Merged into Note [RHS of lets].
+
+Note [RHS of lets]
+~~~~~~~~~~~~~~~~~~
 When the call is the argument of a function with a RULE, or the RHS of a let,
 we are a little bit keener to inline.  For example
      f y = (y,y,y)
diff --git a/compiler/GHC/Core/Utils.hs b/compiler/GHC/Core/Utils.hs
index 244f5f4b42e1672aa6ced8cfed7bff493f782dca..24ab87ac06b4807b137ee93298dce1ffc0e073dc 100644
--- a/compiler/GHC/Core/Utils.hs
+++ b/compiler/GHC/Core/Utils.hs
@@ -1218,12 +1218,12 @@ there is only dictionary selection (no construction) involved
 Note [exprIsCheap]
 ~~~~~~~~~~~~~~~~~~
 
-See also Note [Interaction of exprIsCheap and lone variables] in GHC.Core.Unfold
+See also Note [Interaction of exprIsWorkFree and lone variables] in GHC.Core.Unfold
 
 @exprIsCheap@ looks at a Core expression and returns \tr{True} if
 it is obviously in weak head normal form, or is cheap to get to WHNF.
-[Note that that's not the same as exprIsDupable; an expression might be
-big, and hence not dupable, but still cheap.]
+Note that that's not the same as exprIsDupable; an expression might be
+big, and hence not dupable, but still cheap.
 
 By ``cheap'' we mean a computation we're willing to:
         push inside a lambda, or
@@ -1951,7 +1951,7 @@ exprIsHNFlike is_con is_con_unf = is_hnf_like
 
 {-
 Note [exprIsHNF Tick]
-
+~~~~~~~~~~~~~~~~~~~~~
 We can discard source annotations on HNFs as long as they aren't
 tick-like:
 
@@ -2338,7 +2338,9 @@ There are some particularly delicate points here:
   The above is correct, but eta-reducing g would yield g=f, the linter will
   complain that g and f don't have the same type.
 
-* Note [Arity care]: we need to be careful if we just look at f's
+* Note [Arity care]
+  ~~~~~~~~~~~~~~~~~
+  We need to be careful if we just look at f's
   arity. Currently (Dec07), f's arity is visible in its own RHS (see
   Note [Arity robustness] in GHC.Core.Opt.Simplify.Env) so we must *not* trust the
   arity when checking that 'f' is a value.  Otherwise we will
diff --git a/compiler/GHC/CoreToStg/Prep.hs b/compiler/GHC/CoreToStg/Prep.hs
index afff96e6edb5017afda9c905c74874cdf8757c4f..99c4cd4e8c593fffc2d516e98af5ad5ec3034e94 100644
--- a/compiler/GHC/CoreToStg/Prep.hs
+++ b/compiler/GHC/CoreToStg/Prep.hs
@@ -82,9 +82,8 @@ import Data.Functor.Identity
 import Control.Monad
 
 {-
--- ---------------------------------------------------------------------------
--- Note [CorePrep Overview]
--- ---------------------------------------------------------------------------
+Note [CorePrep Overview]
+~~~~~~~~~~~~~~~~~~~~~~~~
 
 The goal of this pass is to prepare for code generation.
 
@@ -2173,7 +2172,7 @@ newVar ty
 -- ---------------------------------------------------------------------------
 --
 -- Note [Floating Ticks in CorePrep]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- It might seem counter-intuitive to float ticks by default, given
 -- that we don't actually want to move them if we can help it. On the
 -- other hand, nothing gets very far in CorePrep anyway, and we want
diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs
index 1b2f21c4154c3c18a0273b713783bf31f2a63036..1d9e419418bc5ba25c4646070f1a51f2d7be5fe1 100644
--- a/compiler/GHC/Data/FastString.hs
+++ b/compiler/GHC/Data/FastString.hs
@@ -430,7 +430,7 @@ mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
   where ptr = Ptr a#
 
 {- Note [Updating the FastString table]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We use a concurrent hashtable which contains multiple segments, each hash value
 always maps to the same segment. Read is lock-free, write to the a segment
 should acquire a lock for that segment to avoid race condition, writes to
diff --git a/compiler/GHC/Driver/CmdLine.hs b/compiler/GHC/Driver/CmdLine.hs
index b1226ecdca3ca2d659b76b7f3f2c692c5410c0f7..539f27c53ed5b7f459e2a852d9d02a2434ce17c1 100644
--- a/compiler/GHC/Driver/CmdLine.hs
+++ b/compiler/GHC/Driver/CmdLine.hs
@@ -320,15 +320,15 @@ missingArgErr f = Left ("missing argument for flag: " ++ f)
 --------------------------------------------------------
 
 
--- See Note [Handling errors when parsing flags]
+-- See Note [Handling errors when parsing command-line flags]
 errorsToGhcException :: [(String,    -- Location
                           String)]   -- Error
                      -> GhcException
 errorsToGhcException errs =
     UsageError $ intercalate "\n" $ [ l ++ ": " ++ e | (l, e) <- errs ]
 
-{- Note [Handling errors when parsing commandline flags]
-
+{- Note [Handling errors when parsing command-line flags]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Parsing of static and mode flags happens before any session is started, i.e.,
 before the first call to 'GHC.withGhc'. Therefore, to report errors for
 invalid usage of these two types of flags, we can not call any function that
diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs
index df833b03d184ae9ce3d6a1384874ba7d8f217e0f..b8a82656e8aa08c913f7483ad7df97f8377f3d01 100644
--- a/compiler/GHC/Driver/Flags.hs
+++ b/compiler/GHC/Driver/Flags.hs
@@ -320,7 +320,7 @@ data GeneralFlag
    | Opt_ShowHoleConstraints
     -- Options relating to the display of valid hole fits
     -- when generating an error message for a typed hole
-    -- See Note [Valid hole fits include] in GHC.Tc.Errors.Hole
+    -- See Note [Valid hole fits include ...] in GHC.Tc.Errors.Hole
    | Opt_ShowValidHoleFits
    | Opt_SortValidHoleFits
    | Opt_SortBySizeHoleFits
diff --git a/compiler/GHC/Driver/Main.hs b/compiler/GHC/Driver/Main.hs
index 5c088cc95974859b6cbcc7580d994a93b4789477..3e48771ace1a89081a622371953160ebff7a8d9c 100644
--- a/compiler/GHC/Driver/Main.hs
+++ b/compiler/GHC/Driver/Main.hs
@@ -839,8 +839,7 @@ initModDetails hsc_env mod_summary iface =
 
 {-
 Note [ModDetails and --make mode]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 An interface file consists of two parts
 
 * The `ModIface` which ends up getting written to disk.
@@ -957,7 +956,6 @@ hscDesugarAndSimplify summary (FrontendTypecheck tc_result) tc_warnings mb_old_h
 {-
 Note [Writing interface files]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 We write one interface file per module and per compilation, except with
 -dynamic-too where we write two interface files (non-dynamic and dynamic).
 
@@ -1762,7 +1760,6 @@ hscCompileCmmFile hsc_env filename output_filename = runHsc hsc_env $ do
 {-
 Note [Forcing of stg_binds]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 The two last steps in the STG pipeline are:
 
 * Sorting the bindings in dependency order.
@@ -2073,7 +2070,6 @@ hscAddSptEntries hsc_env mnwib entries = do
 {-
   Note [Fixity declarations in GHCi]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
   To support fixity declarations on types defined within GHCi (as requested
   in #10018) we record the fixity environment in InteractiveContext.
   When we want to evaluate something GHC.Tc.Module.runTcInteractive pulls out this
diff --git a/compiler/GHC/Driver/Make.hs b/compiler/GHC/Driver/Make.hs
index fb823e842f55c32f06e98f3ae8640dbee6fee366..4ec6d133488290dd08c586d15bb2c82156b9ebc1 100644
--- a/compiler/GHC/Driver/Make.hs
+++ b/compiler/GHC/Driver/Make.hs
@@ -447,7 +447,7 @@ loadWithCache cache how_much = do
       else throwErrors (fmap GhcDriverMessage errs)
 
 -- Note [Unused packages]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~
 -- Cabal passes `--package-id` flag for each direct dependency. But GHC
 -- loads them lazily, so when compilation is done, we have a list of all
 -- actually loaded packages. All the packages, specified on command line,
@@ -846,7 +846,6 @@ a pair of an `IO a` action and a `MVar a`, where to place the result.
 
 Note [--make mode]
 ~~~~~~~~~~~~~~~~~
-
 There are two main parts to `--make` mode.
 
 1. `downsweep`: Starts from the top of the module graph and computes dependencies.
@@ -857,7 +856,6 @@ computers how to build this ModuleGraph.
 
 Note [Upsweep]
 ~~~~~~~~~~~~~~
-
 Upsweep takes a 'ModuleGraph' as input, computes a build plan and then executes
 the plan in order to compile the project.
 
@@ -2638,7 +2636,7 @@ waitMakeAction :: MakeAction -> IO ()
 waitMakeAction (MakeAction _ mvar) = () <$ readMVar mvar
 
 {- Note [GHC Heap Invariants]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~
 This note is a general place to explain some of the heap invariants which should
 hold for a program compiled with --make mode. These invariants are all things
 which can be checked easily using ghc-debug.
diff --git a/compiler/GHC/Driver/Pipeline.hs b/compiler/GHC/Driver/Pipeline.hs
index 3aaf9f298e56a9668c1d0b3da025d9d6c38d297a..56e188395e33427223fdd7a205e2157b04bbcb60 100644
--- a/compiler/GHC/Driver/Pipeline.hs
+++ b/compiler/GHC/Driver/Pipeline.hs
@@ -303,7 +303,7 @@ compileOne' mHscMessage
          = (Interpreter, gopt_set (dflags2 { backend = Interpreter }) Opt_ForceRecomp)
          | otherwise
          = (backend dflags, dflags2)
-      -- Note [Filepaths and Multiple Home Units]
+       -- See Note [Filepaths and Multiple Home Units]
        dflags  = dflags3 { includePaths = offsetIncludePaths dflags3 $ addImplicitQuoteInclude old_paths [current_dir] }
        upd_summary = summary { ms_hspp_opts = dflags }
        hsc_env = hscSetFlags dflags hsc_env0
@@ -314,7 +314,6 @@ compileOne' mHscMessage
 --
 -- Note [Dynamic linking on macOS]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Since macOS Sierra (10.14), the dynamic system linker enforces
 -- a limit on the Load Commands.  Specifically the Load Command Size
 -- Limit is at 32K (32768).  The Load Commands contain the install
@@ -906,10 +905,8 @@ pipelineStart pipe_env hsc_env input_fn =
    fromSuffix _          = return (Just input_fn)
 
 {-
-
 Note [The Pipeline Monad]
 ~~~~~~~~~~~~~~~~~~~~~~~~~
-
 The pipeline is represented as a free monad by the `TPipelineClass` type synonym,
 which stipulates the general monadic interface for the pipeline and `MonadUse`, instantiated
 to `TPhase`, which indicates the actions available in the pipeline.
diff --git a/compiler/GHC/Driver/Pipeline/Execute.hs b/compiler/GHC/Driver/Pipeline/Execute.hs
index 970c00eab2bcc06ba70aa2e1f9e488973e6cbd68..6bc9df7c6fefac2e8aec82c34ba73982800d8fca 100644
--- a/compiler/GHC/Driver/Pipeline/Execute.hs
+++ b/compiler/GHC/Driver/Pipeline/Execute.hs
@@ -171,7 +171,7 @@ runMergeForeign _pipe_env hsc_env input_fn foreign_os = do
 runLlvmLlcPhase :: PipeEnv -> HscEnv -> FilePath -> IO FilePath
 runLlvmLlcPhase pipe_env hsc_env input_fn = do
     -- Note [Clamping of llc optimizations]
-    --
+    -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     -- See #13724
     --
     -- we clamp the llc optimization between [1,2]. This is because passing -O0
@@ -1077,7 +1077,6 @@ compileStub hsc_env stub_c = compileForeign hsc_env LangC stub_c
 {-
 Note [Produce big objects on Windows]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 The Windows Portable Executable object format has a limit of 32k sections, which
 we tend to blow through pretty easily. Thankfully, there is a "big object"
 extension, which raises this limit to 2^32. However, it must be explicitly
@@ -1272,7 +1271,7 @@ getGhcVersionPathName dflags unit_env = do
 --    +                       3c: R_SPARC_HI22        _GLOBAL_OFFSET_TABLE_-0x8
 
 {- Note [Don't normalise input filenames]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Summary
   We used to normalise input filenames when starting the unlit phase. This
   broke hpc in `--make` mode with imported literate modules (#2991).
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index 0c4503a085cfbe511d292ecae8bfbe4e39066704..838f0bf3b7824774e31607a358374e3fa46ef264 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -1883,7 +1883,7 @@ parseDynamicFlagsFull activeFlags cmdline dflags0 args = do
   let ((leftover, errs, warns), dflags1)
           = runCmdLine (processArgs activeFlags args) dflags0
 
-  -- See Note [Handling errors when parsing commandline flags]
+  -- See Note [Handling errors when parsing command-line flags]
   let rdr = renderWithContext (initSDocContext dflags0 defaultUserStyle)
   unless (null errs) $ liftIO $ throwGhcExceptionIO $ errorsToGhcException $
     map ((rdr . ppr . getLoc &&& unLoc) . errMsg) $ errs
@@ -4586,8 +4586,8 @@ pieCCLDOpts dflags
 
 
 {-
-Note [No PIE while linking]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [No PIE when linking]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 As of 2016 some Linux distributions (e.g. Debian) have started enabling -pie by
 default in their gcc builds. This is incompatible with -r as it implies that we
 are producing an executable. Consequently, we must manually pass -no-pie to gcc
diff --git a/compiler/GHC/HsToCore/Coverage.hs b/compiler/GHC/HsToCore/Coverage.hs
index 2e45539fba817743fe23a5dae0689159e206b2f4..20086620e5659b51cc008a50f14a8db6f4f5ab98 100644
--- a/compiler/GHC/HsToCore/Coverage.hs
+++ b/compiler/GHC/HsToCore/Coverage.hs
@@ -400,7 +400,7 @@ bindTick density name pos fvs = do
 
 
 -- Note [inline sccs]
---
+-- ~~~~~~~~~~~~~~~~~~
 -- The reason not to add ticks to INLINE functions is that this is
 -- sometimes handy for avoiding adding a tick to a particular function
 -- (see #6131)
@@ -1080,6 +1080,7 @@ noFVs :: FreeVars
 noFVs = emptyOccEnv
 
 -- Note [freevars]
+-- ~~~~~~~~~~~~~~~
 --   For breakpoints we want to collect the free variables of an
 --   expression for pinning on the HsTick.  We don't want to collect
 --   *all* free variables though: in particular there's no point pinning
diff --git a/compiler/GHC/HsToCore/Match.hs b/compiler/GHC/HsToCore/Match.hs
index 28391fa815e5b2abe3a489554fb7239476f9a17f..7719e141924daf9e82d76c4cc3a16651963a7291 100644
--- a/compiler/GHC/HsToCore/Match.hs
+++ b/compiler/GHC/HsToCore/Match.hs
@@ -254,7 +254,7 @@ match (v:vs) ty eqns    -- Eqns *can* be empty
           maybeWarn $ filter (not . null) gs
 
 matchEmpty :: MatchId -> Type -> DsM (NonEmpty (MatchResult CoreExpr))
--- See Note [Empty case expressions]
+-- See Note [Empty case alternatives]
 matchEmpty var res_ty
   = return [MR_Fallible mk_seq]
   where
@@ -343,7 +343,7 @@ We do *not* desugar simply to
    error "empty case"
 or some such, because 'x' might be bound to (error "hello"), in which
 case we want to see that "hello" exception, not (error "empty case").
-See also Note [Case elimination: lifted case] in GHC.Core.Opt.Simplify.
+See also the "lifted case" discussion in Note [Case elimination] in GHC.Core.Opt.Simplify.
 
 
 ************************************************************************
diff --git a/compiler/GHC/HsToCore/Pmc/Solver.hs b/compiler/GHC/HsToCore/Pmc/Solver.hs
index 7623c6e710fc3337f77fffe9e52416bfde847a14..b19ce0c4757dfd7660678e6dd369f5bb8a4f4285 100644
--- a/compiler/GHC/HsToCore/Pmc/Solver.hs
+++ b/compiler/GHC/HsToCore/Pmc/Solver.hs
@@ -1004,7 +1004,7 @@ The term oracle state is never obviously (i.e., without consulting the type
 oracle or doing inhabitation testing) contradictory. This implies a few
 invariants:
 * Whenever vi_pos overlaps with vi_neg according to 'eqPmAltCon', we refute.
-  This is implied by the Note [Pos/Neg invariant].
+  This is implied by the Note [The Pos/Neg invariant].
 * Whenever vi_neg subsumes a COMPLETE set, we refute. We consult vi_rcm to
   detect this, but we could just compare whole COMPLETE sets to vi_neg every
   time, if it weren't for performance.
@@ -1496,7 +1496,7 @@ matchConLikeResTy _   (TySt _ inert) ty (PatSynCon ps) = {-# SCC "matchConLikeRe
         then pure subst
         else mzero
 
-{- Note [Soundness and completeness]
+{- Note [Soundness and Completeness]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Soundness and completeness of the pattern-match checker depend entirely on the
 soundness and completeness of the inhabitation test.
diff --git a/compiler/GHC/HsToCore/Types.hs b/compiler/GHC/HsToCore/Types.hs
index bc9d7b4c1d321ca5483d2aa1c7f2ea55d6cc7eaf..e147758260dd5711a9b3bb7415d392e2cb318ffe 100644
--- a/compiler/GHC/HsToCore/Types.hs
+++ b/compiler/GHC/HsToCore/Types.hs
@@ -70,7 +70,7 @@ data DsLclEnv
   { dsl_meta    :: DsMetaEnv   -- ^ Template Haskell bindings
   , dsl_loc     :: RealSrcSpan -- ^ To put in pattern-matching error msgs
   , dsl_nablas  :: Nablas
-  -- ^ See Note [Note [Long-distance information] in "GHC.HsToCore.Pmc".
+  -- ^ See Note [Long-distance information] in "GHC.HsToCore.Pmc".
   -- The set of reaching values Nablas is augmented as we walk inwards, refined
   -- through each pattern match in turn
   }
diff --git a/compiler/GHC/Iface/Ext/Ast.hs b/compiler/GHC/Iface/Ext/Ast.hs
index 83eb475a78865a0b4c24e973e2a7af651d9d470f..cc694f249f416406183e99cbd75439be0a5e4281 100644
--- a/compiler/GHC/Iface/Ext/Ast.hs
+++ b/compiler/GHC/Iface/Ext/Ast.hs
@@ -78,7 +78,7 @@ import Control.Monad.Trans.Class  ( lift )
 import Control.Applicative        ( (<|>) )
 
 {- Note [Updating HieAst for changes in the GHC AST]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 When updating the code in this file for changes in the GHC AST, you
 need to pay attention to the following things:
 
@@ -210,6 +210,7 @@ type TypecheckedSource = LHsBinds GhcTc
 
 
 {- Note [Name Remapping]
+   ~~~~~~~~~~~~~~~~~~~~~
 The Typechecker introduces new names for mono names in AbsBinds.
 We don't care about the distinction between mono and poly bindings,
 so we replace all occurrences of the mono name with the poly name.
@@ -415,6 +416,7 @@ concatM :: Monad m => [m [a]] -> m [a]
 concatM xs = concat <$> sequence xs
 
 {- Note [Capturing Scopes and other non local information]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 toHie is a local transformation, but scopes of bindings cannot be known locally,
 hence we have to push the relevant info down into the binding nodes.
 We use the following types (*Context and *Scoped) to wrap things and
@@ -459,6 +461,7 @@ data PScoped a = PS (Maybe Span)
   deriving (Typeable, Data) -- Pattern Scope
 
 {- Note [TyVar Scopes]
+   ~~~~~~~~~~~~~~~~~~~
 Due to -XScopedTypeVariables, type variables can be in scope quite far from
 their original binding. We resolve the scope of these type variables
 in a separate pass
@@ -512,6 +515,7 @@ tvScopes tvScope rhsScope xs =
   map (\(RS sc a)-> TVS tvScope sc a) $ listScopes rhsScope xs
 
 {- Note [Scoping Rules for SigPat]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Explicitly quantified variables in pattern type signatures are not
 brought into scope in the rhs, but implicitly quantified variables
 are (HsWC and HsIB).
diff --git a/compiler/GHC/Iface/Recomp.hs b/compiler/GHC/Iface/Recomp.hs
index fc12701b614ffe8b757c3cc78c9fd7370a602bb8..2c1943074c405eeb542b0d0d258e18cbb5280552 100644
--- a/compiler/GHC/Iface/Recomp.hs
+++ b/compiler/GHC/Iface/Recomp.hs
@@ -1474,6 +1474,7 @@ declExtras fix_fn ann_fn rule_env inst_env fi_env dm_env decl
 
 
 {- Note [default method Name] (see also #15970)
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The Names for the default methods aren't available in Iface syntax.
 
diff --git a/compiler/GHC/Iface/Recomp/Flags.hs b/compiler/GHC/Iface/Recomp/Flags.hs
index ace07c5977553932d13eea2bfb23bc82fe042c2c..90f3afebbc759a93981d124244fac85f85854a80 100644
--- a/compiler/GHC/Iface/Recomp/Flags.hs
+++ b/compiler/GHC/Iface/Recomp/Flags.hs
@@ -109,7 +109,7 @@ fingerprintHpcFlags dflags@DynFlags{..} nameio =
 
 
 {- Note [path flags and recompilation]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 There are several flags that we deliberately omit from the
 recompilation check; here we explain why.
 
@@ -140,7 +140,6 @@ The only path-related flag left is -hcsuf.
 
 {- Note [Ignoring some flag changes]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 Normally, --make tries to reuse only compilation products that are
 the same as those that would have been produced compiling from
 scratch. Sometimes, however, users would like to be more aggressive
@@ -159,7 +158,6 @@ options out of the flag hash, hashing them separately.
 
 {- Note [Repeated -optP hashing]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 We invoke fingerprintDynFlags for each compiled module to include
 the hash of relevant DynFlags in the resulting interface file.
 -optP (preprocessor) flags are part of that hash.
diff --git a/compiler/GHC/Linker/Loader.hs b/compiler/GHC/Linker/Loader.hs
index a3a2059f07c1fb9f9c3bf7c4c9968443ac3d54ee..046ec5ffd7193901162a64bd43a3bf61b6b304dd 100644
--- a/compiler/GHC/Linker/Loader.hs
+++ b/compiler/GHC/Linker/Loader.hs
@@ -444,7 +444,7 @@ mergeStaticObjects specs = go [] specs
     go [] [] = []
 
 {- Note [preload packages]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~
 Why do we need to preload packages from the command line?  This is an
 explanation copied from #2437:
 
diff --git a/compiler/GHC/Rename/Env.hs b/compiler/GHC/Rename/Env.hs
index a3c126222f4d6edcc03d7ca961c1a5f9b2bfa417..e19697bb40ffbab6b29d22c0c4505fa4347b3897 100644
--- a/compiler/GHC/Rename/Env.hs
+++ b/compiler/GHC/Rename/Env.hs
@@ -1430,7 +1430,7 @@ lookupGreRn_maybe fos rdr_name
 {-
 
 Note [ Unbound vs Ambiguous Names ]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 lookupGreRn_maybe deals with failures in two different ways. If a name
 is unbound then we return a `Nothing` but if the name is ambiguous
 then we raise an error and return a dummy name.
diff --git a/compiler/GHC/Rename/Expr.hs b/compiler/GHC/Rename/Expr.hs
index 837d2b55e86a1a090500a089efa76052100b16a0..bb529c8066762c9c79cb09433f4eb158605bb06b 100644
--- a/compiler/GHC/Rename/Expr.hs
+++ b/compiler/GHC/Rename/Expr.hs
@@ -1720,7 +1720,7 @@ ApplicativeDo
 ************************************************************************
 
 Note [ApplicativeDo]
-
+~~~~~~~~~~~~~~~~~~~~
 = Example =
 
 For a sequence of statements
diff --git a/compiler/GHC/Rename/Unbound.hs b/compiler/GHC/Rename/Unbound.hs
index 5bbc2927aba91b05940b934a94f234a00bf2fb70..6d48ea3074511e57d61da63449e5b36b0253c092 100644
--- a/compiler/GHC/Rename/Unbound.hs
+++ b/compiler/GHC/Rename/Unbound.hs
@@ -232,6 +232,7 @@ similarNameSuggestions looking_for@(LF what_look where_look) dflags global_env
                                 ([],    [])   -> [] ]
 
               -- Note [Only-quals]
+              -- ~~~~~~~~~~~~~~~~~
               -- The second alternative returns those names with the same
               -- OccName as the one we tried, but live in *qualified* imports
               -- e.g. if you have:
diff --git a/compiler/GHC/Runtime/Eval.hs b/compiler/GHC/Runtime/Eval.hs
index b99ffe905a1a5992d5703a22f10b04fc5198c944..f95ef3a5d056f0bd5f629ab093180c5548f4ee08 100644
--- a/compiler/GHC/Runtime/Eval.hs
+++ b/compiler/GHC/Runtime/Eval.hs
@@ -691,7 +691,7 @@ pushResume hsc_env resume = hsc_env { hsc_IC = ictxt1 }
 
   {-
   Note [Syncing breakpoint info]
-
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   To display the values of the free variables for a single breakpoint, the
   function `GHC.Runtime.Eval.bindLocalsAtBreakpoint` pulls
   out the information from the fields `modBreaks_breakInfo` and
diff --git a/compiler/GHC/Runtime/Interpreter.hs b/compiler/GHC/Runtime/Interpreter.hs
index 10d2520f18d67ba6367143a3e3198724e21533b4..2c84980513a1f0063f4ccf79112fe5fdf486344e 100644
--- a/compiler/GHC/Runtime/Interpreter.hs
+++ b/compiler/GHC/Runtime/Interpreter.hs
@@ -121,7 +121,7 @@ import System.Process
 import GHC.Conc (pseq, par)
 
 {- Note [Remote GHCi]
-
+   ~~~~~~~~~~~~~~~~~~
 When the flag -fexternal-interpreter is given to GHC, interpreted code
 is run in a separate process called iserv, and we communicate with the
 external process over a pipe using Binary-encoded messages.
@@ -200,7 +200,7 @@ interpCmd interp msg = case interpInstance interp of
 
 
 -- Note [uninterruptibleMask_ and interpCmd]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- If we receive an async exception, such as ^C, while communicating
 -- with the iserv process then we will be out-of-sync and not be able
 -- to recover.  Thus we use uninterruptibleMask_ during
@@ -510,6 +510,7 @@ unloadObj interp path = do
   interpCmd interp (UnloadObj path')
 
 -- Note [loadObj and relative paths]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- the iserv process might have a different current directory from the
 -- GHC process, so we must make paths absolute before sending them
 -- over.
@@ -635,7 +636,7 @@ runWithPipes createProc prog opts = do
 
 -- -----------------------------------------------------------------------------
 {- Note [External GHCi pointers]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We have the following ways to reference things in GHCi:
 
 HValue
diff --git a/compiler/GHC/StgToByteCode.hs b/compiler/GHC/StgToByteCode.hs
index a69fe69872fc06bae42e9dcfaf1697af7a66e108..ab5d0fb5bcad46a45a47ae0ec254044588ea2024 100644
--- a/compiler/GHC/StgToByteCode.hs
+++ b/compiler/GHC/StgToByteCode.hs
@@ -157,7 +157,7 @@ allocateTopStrings interp topStrings = do
 
 {-
 Note [generating code for top-level string literal bindings]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Here is a summary on how the byte code generator deals with top-level string
 literals:
 
@@ -1102,7 +1102,6 @@ layoutTuple profile start_off arg_ty reps =
 
 {- Note [unboxed tuple bytecodes and tuple_BCO]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
   We have the bytecode instructions RETURN_TUPLE and PUSH_ALTS_TUPLE to
   return and receive arbitrary unboxed tuples, respectively. These
   instructions use the helper data tuple_BCO and tuple_info.
@@ -1782,7 +1781,9 @@ mkMultiBranch maybe_ncons raw_ways = do
             | otherwise
             = return (testEQ (fst val) lbl_default `consOL` snd val)
 
-            -- Note [CASEFAIL] It may be that this case has no default
+            -- Note [CASEFAIL]
+            -- ~~~~~~~~~~~~~~~
+            -- It may be that this case has no default
             -- branch, but the alternatives are not exhaustive - this
             -- happens for GADT cases for example, where the types
             -- prove that certain branches are impossible.  We could
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs
index 3f935c848d0aff68aed0e53718c2ef1c36974488..435f55106b6451553f086166bb33789e148677f6 100644
--- a/compiler/GHC/StgToCmm/Bind.hs
+++ b/compiler/GHC/StgToCmm/Bind.hs
@@ -149,7 +149,7 @@ cgBind (StgRec pairs)
         ;  emit (catAGraphs inits <*> body) }
 
 {- Note [cgBind rec]
-
+   ~~~~~~~~~~~~~~~~~
    Recursive let-bindings are tricky.
    Consider the following pseudocode:
 
@@ -268,7 +268,7 @@ for semi-obvious reasons.
 
 -}
 
----------- Note [Selectors] ------------------
+---------- See Note [Selectors] ------------------
 mkRhsClosure    profile bndr _cc
                 [NonVoid the_fv]                -- Just one free var
                 upd_flag                -- Updatable thunk
@@ -301,7 +301,7 @@ mkRhsClosure    profile bndr _cc
     let lf_info = mkSelectorLFInfo bndr offset_into_int (isUpdatable upd_flag)
     in cgRhsStdThunk bndr lf_info [StgVarArg the_fv]
 
----------- Note [Ap thunks] ------------------
+---------- See Note [Ap thunks] ------------------
 mkRhsClosure    profile bndr _cc
                 fvs
                 upd_flag
@@ -526,7 +526,6 @@ closureCodeBody top_lvl bndr cl_info cc args@(arg0:_) body fv_details
 
 -- Note [NodeReg clobbered with loopification]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Previously we used to pass nodeReg (aka R1) here. With profiling, upon
 -- entering a closure, enterFunCCS was called with R1 passed to it. But since R1
 -- may get clobbered inside the body of a closure, and since a self-recursive
diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs
index 4c414df9e9f91f0ab51ec5707ae20de47871864e..a1ee175bade1031e3871eb04e2a78d5ee5ceabc1 100644
--- a/compiler/GHC/StgToCmm/Foreign.hs
+++ b/compiler/GHC/StgToCmm/Foreign.hs
@@ -127,7 +127,7 @@ cgForeignCall (CCall (CCallSpec target cconv safety)) typ stg_args res_ty
          }
 
 {- Note [safe foreign call convention]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The simple thing to do for a safe foreign call would be the same as an
 unsafe one: just
 
diff --git a/compiler/GHC/StgToCmm/Heap.hs b/compiler/GHC/StgToCmm/Heap.hs
index 39a7812b707a4aee0728f3ab2fa7c8f1e8a00500..ec61ef240661cf107e4190c79937c2126a87134a 100644
--- a/compiler/GHC/StgToCmm/Heap.hs
+++ b/compiler/GHC/StgToCmm/Heap.hs
@@ -491,6 +491,7 @@ cannedGCEntryPoint platform regs
       _otherwise -> Nothing
 
 -- Note [stg_gc arguments]
+-- ~~~~~~~~~~~~~~~~~~~~~~~
 -- It might seem that we could avoid passing the arguments to the
 -- stg_gc function, because they are already in the right registers.
 -- While this is usually the case, it isn't always.  Sometimes the
@@ -670,7 +671,6 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
 
 -- Note [Self-recursive loop header]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Self-recursive loop header is required by loopification optimization (See
 -- Note [Self-recursive tail calls] in GHC.StgToCmm.Expr). We emit it if:
 --
diff --git a/compiler/GHC/StgToCmm/Layout.hs b/compiler/GHC/StgToCmm/Layout.hs
index 9e14d1e766be9776a18a4f88fcea996e76747289..5664be908e3561a910e4e92ffe558d7fea616453 100644
--- a/compiler/GHC/StgToCmm/Layout.hs
+++ b/compiler/GHC/StgToCmm/Layout.hs
@@ -212,7 +212,7 @@ slowCall fun stg_args
                                       " with pat " ++ unpackFS rts_fun)
            return r
 
-        -- Note [avoid intermediate PAPs]
+        -- See Note [avoid intermediate PAPs]
         let n_args = length stg_args
         if n_args > arity && fast_pap
            then do
@@ -261,7 +261,7 @@ slowCall fun stg_args
 
 
 -- Note [avoid intermediate PAPs]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- A slow call which needs multiple generic apply patterns will be
 -- almost guaranteed to create one or more intermediate PAPs when
 -- applied to a function that takes the correct number of arguments.
@@ -339,7 +339,7 @@ nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
 
 {-
 Note [over-saturated calls]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The natural thing to do for an over-saturated call would be to call
 the function with the correct number of arguments, and then apply the
 remaining arguments to the value returned, e.g.
diff --git a/compiler/GHC/StgToCmm/Monad.hs b/compiler/GHC/StgToCmm/Monad.hs
index d8d66002681a3759eeb25d1d874c923ef54e3dd0..5ab12a4634632c033591a3ac16f4fe6395828d98 100644
--- a/compiler/GHC/StgToCmm/Monad.hs
+++ b/compiler/GHC/StgToCmm/Monad.hs
@@ -203,7 +203,7 @@ data ReturnKind
   | ReturnedTo BlockId ByteOff
 
 -- Note [sharing continuations]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- ReturnKind says how the expression being compiled returned its
 -- results: either by assigning directly to the registers specified
 -- by the Sequel, or by returning to a continuation that does the
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
index 766a6e8a9c7550f47f9fe69994553ee4283bf05b..b980c0aacda0aed68fecd85918cfdd44515f9275 100644
--- a/compiler/GHC/StgToCmm/Prim.hs
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -74,7 +74,7 @@ cgOpApp :: StgOp        -- The op
 -- Foreign calls
 cgOpApp (StgFCallOp fcall ty) stg_args res_ty
   = cgForeignCall fcall ty stg_args res_ty
-      -- Note [Foreign call results]
+      -- See Note [Foreign call results]
 
 cgOpApp (StgPrimOp primop) args res_ty = do
     cfg <- getStgToCmmConfig
@@ -1716,7 +1716,6 @@ emitPrimOp cfg primop =
   alwaysExternal = \_ -> PrimopCmmEmit_External
   -- Note [QuotRem optimization]
   -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  --
   -- `quot` and `rem` with constant divisor can be implemented with fast bit-ops
   -- (shift, .&.).
   --
diff --git a/compiler/GHC/SysTools.hs b/compiler/GHC/SysTools.hs
index 0b19d508254ac273773a7808d4e6a59de39c12a5..adc6e6c241a0a37967c0a00aa66cb80864eb6905 100644
--- a/compiler/GHC/SysTools.hs
+++ b/compiler/GHC/SysTools.hs
@@ -156,8 +156,8 @@ initSysTools top_dir = do
     Left (SettingsError_MissingData msg) -> pgmError msg
     Left (SettingsError_BadData msg) -> pgmError msg
 
-{- Note [Windows stack usage]
-
+{- Note [Windows stack allocations]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 See: #8870 (and #8834 for related info) and #12186
 
 On Windows, occasionally we need to grow the stack. In order to do
diff --git a/compiler/GHC/SysTools/BaseDir.hs b/compiler/GHC/SysTools/BaseDir.hs
index 54c42e7c52ff66319527776ebe3a0cd7cdb5fc66..312c029dd9f64c93a00d29961f3354f440d3f88e 100644
--- a/compiler/GHC/SysTools/BaseDir.hs
+++ b/compiler/GHC/SysTools/BaseDir.hs
@@ -35,7 +35,6 @@ import System.Directory (doesDirectoryExist)
 {-
 Note [topdir: How GHC finds its files]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 GHC needs various support files (library packages, RTS etc), plus
 various auxiliary programs (cp, gcc, etc).  It starts by finding topdir,
 the root of GHC's support files
@@ -54,7 +53,7 @@ from topdir we can find package.conf, ghc-asm, etc.
 
 
 Note [tooldir: How GHC finds mingw on Windows]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 GHC has some custom logic on Windows for finding the mingw
 toolchain and perl. Depending on whether GHC is built
 with the make build system or Hadrian, and on whether we're
diff --git a/compiler/GHC/SysTools/Info.hs b/compiler/GHC/SysTools/Info.hs
index 83a76b9efba40ed8227671ed46bc081ec7a63a83..81650495baa5821546a7025f7d5df93e8eb26182 100644
--- a/compiler/GHC/SysTools/Info.hs
+++ b/compiler/GHC/SysTools/Info.hs
@@ -26,7 +26,7 @@ import GHC.Prelude
 import GHC.SysTools.Process
 
 {- Note [Run-time linker info]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 See also: #5240, #6063, #10110
 
 Before 'runLink', we need to be sure to get the relevant information
@@ -57,7 +57,7 @@ circular dependency.
 -}
 
 {- Note [ELF needed shared libs]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Some distributions change the link editor's default handling of
 ELF DT_NEEDED tags to include only those shared objects that are
 needed to resolve undefined symbols. For Template Haskell we need
@@ -71,7 +71,7 @@ The flag is only needed on ELF systems. On Windows (PE) and Mac OS X
 -}
 
 {- Note [Windows static libGCC]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The GCC versions being upgraded to in #10726 are configured with
 dynamic linking of libgcc supported. This results in libgcc being
 linked dynamically when a shared library is created.
@@ -177,10 +177,10 @@ getLinkerInfo' logger dflags = do
             "-Wl,--hash-size=31"
           , "-Wl,--reduce-memory-overheads"
             -- Emit gcc stack checks
-            -- Note [Windows stack usage]
+            -- See Note [Windows stack allocations]
           , "-fstack-check"
             -- Force static linking of libGCC
-            -- Note [Windows static libGCC]
+            -- See Note [Windows static libGCC]
           , "-static-libgcc" ]
       _ -> do
         -- In practice, we use the compiler as the linker here. Pass
diff --git a/compiler/GHC/Tc/Errors.hs b/compiler/GHC/Tc/Errors.hs
index 2901078004be46f73eecc5b53c804710e66f656b..70f5d0ddd72deb8a267cf0ee9f3cf263d9243e60 100644
--- a/compiler/GHC/Tc/Errors.hs
+++ b/compiler/GHC/Tc/Errors.hs
@@ -1910,7 +1910,7 @@ mkDictErr ctxt cts
     -- but we really only want to report the latter
     elim_superclasses cts = mkMinimalBySCs ctPred cts
 
--- [Note: mk_dict_err]
+-- Note [mk_dict_err]
 -- ~~~~~~~~~~~~~~~~~~~
 -- Different dictionary error messages are reported depending on the number of
 -- matches and unifiers:
diff --git a/compiler/GHC/Tc/Gen/Splice.hs b/compiler/GHC/Tc/Gen/Splice.hs
index fe6ec75568e3c12264f9cd97f6a2cd262c8ef5e8..747b3a7d98bfd11cb613f7b21d7c73d5249d28f8 100644
--- a/compiler/GHC/Tc/Gen/Splice.hs
+++ b/compiler/GHC/Tc/Gen/Splice.hs
@@ -1553,7 +1553,7 @@ readQResult i = do
     QFail str -> fail str
 
 {- Note [TH recover with -fexternal-interpreter]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Recover is slightly tricky to implement.
 
 The meaning of "recover a b" is
diff --git a/compiler/GHC/Tc/Instance/FunDeps.hs b/compiler/GHC/Tc/Instance/FunDeps.hs
index 9abfc31f0b4b19fedafe9e7600fd7de7457b5a9c..4b45f2fa389b7b841163e03b4ed4e58a0414a5b8 100644
--- a/compiler/GHC/Tc/Instance/FunDeps.hs
+++ b/compiler/GHC/Tc/Instance/FunDeps.hs
@@ -374,7 +374,7 @@ checkInstCoverage :: Bool   -- Be liberal
                   -> Class -> [PredType] -> [Type]
                   -> Validity
 -- "be_liberal" flag says whether to use "liberal" coverage of
---              See Note [Coverage Condition] below
+--              See Note [Coverage condition] below
 --
 -- Return values
 --    Nothing  => no problems
diff --git a/compiler/GHC/Tc/Solver/Interact.hs b/compiler/GHC/Tc/Solver/Interact.hs
index a088637e4664e6e720468021bff3ae0fdc9e707e..36e9afae98a408b6cea9e1301343212d84989d98 100644
--- a/compiler/GHC/Tc/Solver/Interact.hs
+++ b/compiler/GHC/Tc/Solver/Interact.hs
@@ -1632,8 +1632,8 @@ Now the second wanted comes along, but it cannot rewrite the given, so we simply
 At the end we spontaneously solve that guy, *reunifying*  [alpha := Int]
 
 We avoid this problem by orienting the resulting given so that the unification
-variable is on the left.  [Note that alternatively we could attempt to
-enforce this at canonicalization]
+variable is on the left (note that alternatively we could attempt to
+enforce this at canonicalization).
 
 See also Note [No touchables as FunEq RHS] in GHC.Tc.Solver.Monad; avoiding
 double unifications is the main reason we disallow touchable
@@ -2177,7 +2177,7 @@ chooseInstance work_item
        ; if isDerived ev
          then -- Use type-class instances for Deriveds, in the hope
               -- of generating some improvements
-              -- C.f. Example 3 of Note [The improvement story]
+              -- C.f. Example 3 of Note [The improvement story and derived shadows]
               -- It's easy because no evidence is involved
            do { dflags <- getDynFlags
               ; unless (subGoalDepthExceeded dflags (ctLocDepth deeper_loc)) $
diff --git a/compiler/GHC/Tc/Types.hs b/compiler/GHC/Tc/Types.hs
index 2de119b416c45a012f690a762eaf5cc2ab844cec..a784eb57192ce9bb3318ad5953623e0fa1e879c6 100644
--- a/compiler/GHC/Tc/Types.hs
+++ b/compiler/GHC/Tc/Types.hs
@@ -1046,7 +1046,7 @@ thLevel (Brack s _)   = thLevel s + 1
 thLevel (RunSplice _) = panic "thLevel: called when running a splice"
                         -- See Note [RunSplice ThLevel].
 
-{- Node [RunSplice ThLevel]
+{- Note [RunSplice ThLevel]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The 'RunSplice' stage is set when executing a splice, and only when running a
 splice. In particular it is not set when the splice is renamed or typechecked.
diff --git a/compiler/GHC/Tc/Types/Constraint.hs b/compiler/GHC/Tc/Types/Constraint.hs
index ffe14b3d62b97e944d9556ae029fde7c075fa7d9..f1d59bf04b70a9ccbba1a968706a55cf4e84ba67 100644
--- a/compiler/GHC/Tc/Types/Constraint.hs
+++ b/compiler/GHC/Tc/Types/Constraint.hs
@@ -2093,7 +2093,8 @@ but we do not want to complain about Bool ~ Char!
 Note [Deriveds do rewrite Deriveds]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 However we DO allow Deriveds to rewrite Deriveds, because that's how
-improvement works; see Note [The improvement story] in GHC.Tc.Solver.Interact.
+improvement works; see Note [The improvement story and derived shadows] in
+GHC.Tc.Solver.Interact.
 
 However, for now at least I'm only letting (Derived,NomEq) rewrite
 (Derived,NomEq) and not doing anything for ReprEq.  If we have
diff --git a/compiler/GHC/Tc/Utils/TcMType.hs b/compiler/GHC/Tc/Utils/TcMType.hs
index c510d29f63262842ae7ca553c887724441bdea03..22f9b14d266433aefe2d5571631ee325e6e29802 100644
--- a/compiler/GHC/Tc/Utils/TcMType.hs
+++ b/compiler/GHC/Tc/Utils/TcMType.hs
@@ -2257,7 +2257,7 @@ Consider this:
 
 * So we get a dict binding for Num (C d a), which is zonked to give
         a = ()
-  [Note Sept 04: now that we are zonking quantified type variables
+  Note (Sept 04): now that we are zonking quantified type variables
   on construction, the 'a' will be frozen as a regular tyvar on
   quantification, so the floated dict will still have type (C d a).
   Which renders this whole note moot; happily!]
diff --git a/compiler/GHC/ThToHs.hs b/compiler/GHC/ThToHs.hs
index 1020b5af3f5075afb24629e10b17b065dd10d47e..670371979703c7cd0498c567d013be7b60aa49bd 100644
--- a/compiler/GHC/ThToHs.hs
+++ b/compiler/GHC/ThToHs.hs
@@ -1110,6 +1110,7 @@ cvt_tup es boxity = do { let cvtl_maybe Nothing  = return (missingTupArg noAnn)
                                     boxity }
 
 {- Note [Operator association]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We must be quite careful about adding parens:
   * Infix (UInfix ...) op arg      Needs parens round the first arg
   * Infix (Infix ...) op arg       Needs parens round the first arg
diff --git a/compiler/GHC/Types/Name.hs b/compiler/GHC/Types/Name.hs
index b34f32de433b4622f17358003070e68a4d29b213..d919919e8143e6f1fc5d4b7cccc031a83a0c9f05 100644
--- a/compiler/GHC/Types/Name.hs
+++ b/compiler/GHC/Types/Name.hs
@@ -199,6 +199,7 @@ TL;DR: we make the `n_occ` field lazy.
 
 {-
 Note [About the NameSorts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 1.  Initially, top-level Ids (including locally-defined ones) get External names,
     and all other local Ids get Internal names
diff --git a/compiler/GHC/Types/Name/Occurrence.hs b/compiler/GHC/Types/Name/Occurrence.hs
index cb984132799f155873809cb393b2257750d3df97..7aabf83dd930d343fae7e9ac32b580620b397214 100644
--- a/compiler/GHC/Types/Name/Occurrence.hs
+++ b/compiler/GHC/Types/Name/Occurrence.hs
@@ -132,6 +132,7 @@ data NameSpace = VarName        -- Variables, including "real" data constructors
                deriving( Eq, Ord )
 
 -- Note [Data Constructors]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~
 -- see also: Note [Data Constructor Naming] in GHC.Core.DataCon
 --
 -- $real_vs_source_data_constructors
diff --git a/compiler/GHC/Types/Tickish.hs b/compiler/GHC/Types/Tickish.hs
index 30827bb1fb85c21142d9e07b403526472621dbc2..480bb2befd5fa9c9b85afd16445c15b44c0f3eec 100644
--- a/compiler/GHC/Types/Tickish.hs
+++ b/compiler/GHC/Types/Tickish.hs
@@ -59,7 +59,7 @@ data TickishPass
 
 {-
    Note [Tickish passes]
-
+   ~~~~~~~~~~~~~~~~~~~~~
    Tickish annotations store different information depending on
    where they are used. Here's a summary of the differences
    between the passes.
diff --git a/compiler/GHC/Types/Var.hs b/compiler/GHC/Types/Var.hs
index 5ca0d0002832500711dc80ac5c8028df29e09260..0a6dc6079a5fec50b8edd3367c7c703eab79582a 100644
--- a/compiler/GHC/Types/Var.hs
+++ b/compiler/GHC/Types/Var.hs
@@ -620,7 +620,7 @@ Currently there are nine different uses of 'VarBndr':
 
 * TyCon.TyConTyCoBinder = VarBndr TyCoVar TyConBndrVis
   Binders of a PromotedDataCon
-  See Note [Promoted GADT data construtors] in GHC.Core.TyCon
+  See Note [Promoted GADT data constructors] in GHC.Core.TyCon
 
 * IfaceType.IfaceForAllBndr     = VarBndr IfaceBndr ArgFlag
 * IfaceType.IfaceForAllSpecBndr = VarBndr IfaceBndr Specificity
diff --git a/compiler/GHC/Types/Var/Env.hs b/compiler/GHC/Types/Var/Env.hs
index 683face5c96f4931da7db8dd4f09da6372b5c75e..55ea2a0dda6205b6efc000c80c2993df07d2f1de 100644
--- a/compiler/GHC/Types/Var/Env.hs
+++ b/compiler/GHC/Types/Var/Env.hs
@@ -324,6 +324,7 @@ rnBndr2_var (RV2 { envL = envL, envR = envR, in_scope = in_scope }) bL bR
           | otherwise                          = uniqAway' in_scope bL
 
         -- Note [Rebinding]
+        -- ~~~~~~~~~~~~~~~~
         -- If the new var is the same as the old one, note that
         -- the extendVarEnv *deletes* any current renaming
         -- E.g.   (\x. \x. ...)  ~  (\y. \z. ...)
diff --git a/compiler/GHC/Unit.hs b/compiler/GHC/Unit.hs
index 155d5b35255e4fa2b5c5c3be00c66c5ad4529901..4affdc33c8fc9919c245bac39a9486474cbae880 100644
--- a/compiler/GHC/Unit.hs
+++ b/compiler/GHC/Unit.hs
@@ -20,10 +20,8 @@ import GHC.Unit.Home
 import GHC.Unit.State
 
 {-
-
-Note [About Units]
+Note [About units]
 ~~~~~~~~~~~~~~~~~~
-
 Haskell users are used to manipulating Cabal packages. These packages are
 identified by:
    - a package name :: String
diff --git a/compiler/GHC/Unit/State.hs b/compiler/GHC/Unit/State.hs
index 8644848310949b966e051c154f2e2dee98d99312..3e74ae9936edadb12a06fe27c5ddc4859bb9e6b4 100644
--- a/compiler/GHC/Unit/State.hs
+++ b/compiler/GHC/Unit/State.hs
@@ -318,8 +318,8 @@ data UnitConfig = UnitConfig
    , unitConfigWays           :: !Ways          -- ^ Ways to use
 
    , unitConfigAllowVirtual   :: !Bool          -- ^ Allow virtual units
-      -- ^ Do we allow the use of virtual units instantiated on-the-fly (see Note
-      -- [About units] in GHC.Unit). This should only be true when we are
+      -- ^ Do we allow the use of virtual units instantiated on-the-fly (see
+      -- Note [About units] in GHC.Unit). This should only be true when we are
       -- type-checking an indefinite unit (not producing any code).
 
    , unitConfigProgramName    :: !String
@@ -1148,7 +1148,7 @@ findWiredInUnits logger prec_map pkgs vis_map = do
                            -- every non instantiated unit is an instance of
                            -- itself (required by Backpack...)
                            --
-                           -- See Note [About Units] in GHC.Unit
+                           -- See Note [About units] in GHC.Unit
                         }
                   | otherwise
                   = pkg
@@ -2180,7 +2180,7 @@ renameHoleModule :: UnitState -> ShHoleSubst -> Module -> Module
 renameHoleModule state = renameHoleModule' (unitInfoMap state) (preloadClosure state)
 
 -- | Substitutes holes in a 'Unit', suitable for renaming when
--- an include occurs; see Note [Representation of module/name variable].
+-- an include occurs; see Note [Representation of module/name variables].
 --
 -- @p[A=\<A>]@ maps to @p[A=\<B>]@ with @A=\<B>@.
 renameHoleUnit :: UnitState -> ShHoleSubst -> Unit -> Unit
diff --git a/compiler/GHC/Utils/Ppr.hs b/compiler/GHC/Utils/Ppr.hs
index 9b5b55658fbc213b88a4ea048d25ae89af0afdd0..af2b6f977a5687b4e92e36997ba0300148f8d77d 100644
--- a/compiler/GHC/Utils/Ppr.hs
+++ b/compiler/GHC/Utils/Ppr.hs
@@ -22,7 +22,7 @@
 
 {-
 Note [Differences between libraries/pretty and compiler/GHC/Utils/Ppr.hs]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 For historical reasons, there are two different copies of `Pretty` in the GHC
 source tree:
  * `libraries/pretty` is a submodule containing
@@ -441,7 +441,7 @@ braces p       = char '{' <> p <> char '}'
 
 {-
 Note [Print Hexadecimal Literals]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Relevant discussions:
  * Phabricator: https://phabricator.haskell.org/D4465
  * GHC Trac: https://gitlab.haskell.org/ghc/ghc/issues/14872
diff --git a/compiler/Language/Haskell/Syntax/Decls.hs b/compiler/Language/Haskell/Syntax/Decls.hs
index fbeebf9213a72cc91566707b18e334572ed229b9..b668d7fbff4e579d28a9a9c5d37682018ac72902 100644
--- a/compiler/Language/Haskell/Syntax/Decls.hs
+++ b/compiler/Language/Haskell/Syntax/Decls.hs
@@ -430,7 +430,7 @@ data TyClDecl pass
     DataDecl { tcdDExt     :: XDataDecl pass       -- ^ Post renamer, CUSK flag, FVs
              , tcdLName    :: LIdP pass             -- ^ Type constructor
              , tcdTyVars   :: LHsQTyVars pass      -- ^ Type variables
-                              -- See Note [TyVar binders for associated declarations]
+                              -- See Note [TyVar binders for associated decls]
              , tcdFixity   :: LexicalFixity        -- ^ Fixity used in the declaration
              , tcdDataDefn :: HsDataDefn pass }
 
@@ -826,7 +826,7 @@ data FamilyDecl pass = FamilyDecl
   , fdTopLevel       :: TopLevelFlag                 -- used for printing only
   , fdLName          :: LIdP pass                    -- type constructor
   , fdTyVars         :: LHsQTyVars pass              -- type variables
-                       -- See Note [TyVar binders for associated declarations]
+                       -- See Note [TyVar binders for associated decls]
   , fdFixity         :: LexicalFixity                -- Fixity used in the declaration
   , fdResultSig      :: LFamilyResultSig pass        -- result signature
   , fdInjectivityAnn :: Maybe (LInjectivityAnn pass) -- optional injectivity ann
diff --git a/compiler/Language/Haskell/Syntax/Expr.hs b/compiler/Language/Haskell/Syntax/Expr.hs
index 418aa59f84b340baf500c87ec7401e0603e0c0d9..049d5113184edc0b3196a51089c6cdccff140812 100644
--- a/compiler/Language/Haskell/Syntax/Expr.hs
+++ b/compiler/Language/Haskell/Syntax/Expr.hs
@@ -1508,7 +1508,7 @@ In any other context than 'MonadComp', the fields for most of these
 
 
 Note [Applicative BodyStmt]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 (#12143) For the purposes of ApplicativeDo, we treat any BodyStmt
 as if it was a BindStmt with a wildcard pattern.  For example,
 
diff --git a/compiler/Language/Haskell/Syntax/Type.hs b/compiler/Language/Haskell/Syntax/Type.hs
index 173b75c4c2a3f3d8afbfe4b973a11eee8945c05c..10c2c03b48d2fe026a11f13abb80724f523d8588 100644
--- a/compiler/Language/Haskell/Syntax/Type.hs
+++ b/compiler/Language/Haskell/Syntax/Type.hs
@@ -1261,6 +1261,7 @@ instance (Outputable tm, Outputable ty) => Outputable (HsArg tm ty) where
   ppr (HsArgPar sp)     = text "HsArgPar"  <+> ppr sp
 {-
 Note [HsArgPar]
+~~~~~~~~~~~~~~~
 A HsArgPar indicates that everything to the left of this in the argument list is
 enclosed in parentheses together with the function itself. It is necessary so
 that we can recreate the parenthesis structure in the original source after
diff --git a/ghc.mk b/ghc.mk
index 961935f70dfa9994f29de4ac528a42cb39aa688a..4e6e0c2a12c45cc6423c952c795a95e3706b3d4a 100644
--- a/ghc.mk
+++ b/ghc.mk
@@ -515,9 +515,8 @@ INSTALL_PACKAGES += $(addprefix libraries/,$(PACKAGES_STAGE2))
 
 endif # CLEANING
 
-# -------------------------------------------
-# Note [Dependencies between package-data.mk files].
-
+# Note [Dependencies between package-data.mk files]
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # We cannot run ghc-cabal to configure a package until we have
 # configured and registered all of its dependencies.  So the following
 # hack forces all the configure steps to happen in exactly the following order:
@@ -1497,7 +1496,7 @@ endif
 	cd libraries/xhtml && rm -f Setup Setup.exe Setup.hi Setup.o
 
 # Note [No stage2 packages when CrossCompiling or Stage1Only]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # (first read Note [CrossCompiling vs Stage1Only] and
 #  Note [Stage1Only vs stage=1] in mk/config.mk.in)
 #
diff --git a/ghc/GHCi/UI.hs b/ghc/GHCi/UI.hs
index 53838f39eb76dae2fe109960af3aaad7049036d6..9a62d53d1705041153a72ce315db8d086a467595 100644
--- a/ghc/GHCi/UI.hs
+++ b/ghc/GHCi/UI.hs
@@ -579,7 +579,7 @@ interactiveUI config srcs maybe_exprs = do
 
 {-
 Note [Changing language extensions for interactive evaluation]
---------------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 GHCi maintains two sets of options:
 
 - The "loading options" apply when loading modules
@@ -3353,7 +3353,6 @@ printTyThing tyth = printForUser (pprTyThing showToHeader tyth)
 {-
 Note [Filter bindings]
 ~~~~~~~~~~~~~~~~~~~~~~
-
 If we don't filter the bindings returned by the function GHC.getBindings,
 then the :show bindings command will also show unwanted bound names,
 internally generated by GHC, eg:
diff --git a/ghc/Main.hs b/ghc/Main.hs
index 69ec3a859393859e9b6d44ed11fc6e7c0de79633..cb701e24e2cc257afbfda9ca318dee7f8eb6dffd 100644
--- a/ghc/Main.hs
+++ b/ghc/Main.hs
@@ -567,7 +567,7 @@ parseModeFlags args = do
              Nothing     -> doMakeMode
              Just (m, _) -> m
 
-  -- See Note [Handling errors when parsing commandline flags]
+  -- See Note [Handling errors when parsing command-line flags]
   unless (null errs1 && null errs2) $ throwGhcException $ errorsToGhcException $
       map (("on the commandline", )) $ map (unLoc . errMsg) errs1 ++ errs2
 
diff --git a/hadrian/src/Expression.hs b/hadrian/src/Expression.hs
index 62e83ccecb536f40628f5bc0a11bd4e70b1f93a7..a70aa75e9da7443aa036cc192d832b8085733be8 100644
--- a/hadrian/src/Expression.hs
+++ b/hadrian/src/Expression.hs
@@ -100,7 +100,6 @@ way w = (w ==) <$> getWay
 {-
 Note [Stage Names]
 ~~~~~~~~~~~~~~~~~~
-
 Code referring to specific stages can be a bit tricky. In Hadrian, the stages
 have the same names they carried in the autoconf build system, but they are
 often referred to by the stage used to construct them. For example, the stage 1
diff --git a/hadrian/src/Flavour.hs b/hadrian/src/Flavour.hs
index f31e7667e1bc758a33c47ab0a7db745c714c0ff8..c46f77a2fb008f83c9dd76f2d3a14dbccd2a06af 100644
--- a/hadrian/src/Flavour.hs
+++ b/hadrian/src/Flavour.hs
@@ -267,7 +267,6 @@ collectTimings =
 {-
 Note [Hadrian settings]
 ~~~~~~~~~~~~~~~~~~~~~~~
-
 Hadrian lets one customize GHC builds through the UserSettings module,
 where Hadrian users can override existing 'Flavour's or create entirely
 new ones, overriding/extending the options passed to some builder
diff --git a/hadrian/src/Rules/BinaryDist.hs b/hadrian/src/Rules/BinaryDist.hs
index da6f27d1deaaec7791902105e8367c013d8bd973..a6693a75f3c4d23ef59f816fd6831e317150901f 100644
--- a/hadrian/src/Rules/BinaryDist.hs
+++ b/hadrian/src/Rules/BinaryDist.hs
@@ -463,7 +463,6 @@ createVersionWrapper versioned_exe install_path = do
 {-
 Note [Two Types of Wrappers]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 There are two different types of wrapper scripts.
 
 1. The wrapper scripts installed
diff --git a/hadrian/src/Rules/Libffi.hs b/hadrian/src/Rules/Libffi.hs
index df7529ffc5fdd9722e14ab1c1f22b1e55a609ecc..e37faee5dc079721740ecb8ad893f826b6415b0f 100644
--- a/hadrian/src/Rules/Libffi.hs
+++ b/hadrian/src/Rules/Libffi.hs
@@ -14,7 +14,7 @@ import Target
 import Utilities
 
 {- Note [Libffi indicating inputs]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 First see https://gitlab.haskell.org/ghc/ghc/wikis/Developing-Hadrian for an
 explanation of "indicating input". Part of the definition is copied here for
 your convenience:
diff --git a/libraries/base/Control/Concurrent/QSem.hs b/libraries/base/Control/Concurrent/QSem.hs
index 9f53eef826884eac51cf66c179b6ce1a1f51bc72..08524323e21bf6f43ac21f5d4a4a7e17814bcc86 100644
--- a/libraries/base/Control/Concurrent/QSem.hs
+++ b/libraries/base/Control/Concurrent/QSem.hs
@@ -100,7 +100,7 @@ signalQSem (QSem m) =
     putMVar m r'
 
 -- Note [signal uninterruptible]
---
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 --   If we have
 --
 --      bracket waitQSem signalQSem (...)
diff --git a/libraries/base/Control/Monad.hs b/libraries/base/Control/Monad.hs
index 86c15daf0060cecc8ec87b346e13d61f698491fd..d2d0e29e431f27ef442b69585e24af3d0204b98a 100644
--- a/libraries/base/Control/Monad.hs
+++ b/libraries/base/Control/Monad.hs
@@ -222,7 +222,7 @@ zipWithM f xs ys  =  sequenceA (zipWith f xs ys)
 zipWithM_         :: (Applicative m) => (a -> b -> m c) -> [a] -> [b] -> m ()
 {-# INLINE zipWithM_ #-}
 -- Inline so that fusion with zipWith and sequenceA have a chance to fire
--- See Note [Fusion for zipN/zipWithN] in List.hs]
+-- See Note [Fusion for zipN/zipWithN] in List.hs.
 zipWithM_ f xs ys =  sequenceA_ (zipWith f xs ys)
 
 {- | The 'foldM' function is analogous to 'Data.Foldable.foldl', except that its result is
@@ -262,7 +262,6 @@ foldM_ f a xs  = foldlM f a xs >> return ()
 {-
 Note [Worker/wrapper transform on replicateM/replicateM_]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 The implementations of replicateM and replicateM_ both leverage the
 worker/wrapper transform. The simpler implementation of replicateM_, as an
 example, would be:
diff --git a/libraries/base/Control/Monad/ST/Imp.hs b/libraries/base/Control/Monad/ST/Imp.hs
index cbcee8dd909035f4055e52ec3a1bbd839665ce77..9658fbd635386142992df798c365de515bdd6fb5 100644
--- a/libraries/base/Control/Monad/ST/Imp.hs
+++ b/libraries/base/Control/Monad/ST/Imp.hs
@@ -62,7 +62,6 @@ fixST k = unsafeIOToST $ do
 
 {- Note [fixST]
    ~~~~~~~~~~~~
-
 For many years, we implemented fixST much like a pure fixpoint,
 using liftST:
 
diff --git a/libraries/base/Control/Monad/ST/Lazy/Imp.hs b/libraries/base/Control/Monad/ST/Lazy/Imp.hs
index de8310c76d4671b55badf6a4f24cbe9cefd79232..888839132e63b105b5191b00c793cbd96a34add1 100644
--- a/libraries/base/Control/Monad/ST/Lazy/Imp.hs
+++ b/libraries/base/Control/Monad/ST/Lazy/Imp.hs
@@ -68,7 +68,7 @@ newtype ST s a = ST { unST :: State s -> (a, State s) }
 data State s = S# (State# s)
 
 {- Note [Lazy ST and multithreading]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We used to imagine that passing a polymorphic state token was all that we
 needed to keep state threads separate (see Launchbury and Peyton Jones, 1994:
 https://www.microsoft.com/en-us/research/publication/lazy-functional-state-threads/).
@@ -87,7 +87,7 @@ one we get from the previous computation, and the unlifted one we pull out of
 thin air. -}
 
 {- Note [Lazy ST: not producing lazy pairs]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The fixST and strictToLazyST functions used to construct functions that
 produced lazy pairs. Why don't we need that laziness? The ST type is kept
 abstract, so no one outside this module can ever get their hands on a (result,
diff --git a/libraries/base/Data/OldList.hs b/libraries/base/Data/OldList.hs
index 2a50de3ee599f54c5321e63942b47dceef4335c2..5ffdd84ad3141739b8fe265eb71b0ce0f82b23e2 100644
--- a/libraries/base/Data/OldList.hs
+++ b/libraries/base/Data/OldList.hs
@@ -1361,6 +1361,7 @@ singleton x = [x]
 --
 
 -- Note [INLINE unfoldr]
+-- ~~~~~~~~~~~~~~~~~~~~~
 -- We treat unfoldr a little differently from some other forms for list fusion
 -- for two reasons:
 --
diff --git a/libraries/base/Data/Typeable/Internal.hs b/libraries/base/Data/Typeable/Internal.hs
index 13853c930189d1833d0d5913ac4a3660790179a0..e9cb1a1937fd2a7f4fc987301604417ed8e6b010 100644
--- a/libraries/base/Data/Typeable/Internal.hs
+++ b/libraries/base/Data/Typeable/Internal.hs
@@ -275,7 +275,6 @@ having to walk their full structures.
 
 {- Note [Kind caching]
    ~~~~~~~~~~~~~~~~~~~
-
 We cache the kind of the TypeRep in each TrTyCon and TrApp constructor.
 This is necessary to ensure that typeRepKind (which is used, at least, in
 deserialization and dynApply) is cheap. There are two reasons for this:
diff --git a/libraries/base/GHC/Base.hs b/libraries/base/GHC/Base.hs
index 21ad469d277730ae9e41bc5d8b6a862962182824..bf1527076cdb2d9c22e387d483ce17cb32fcf032 100644
--- a/libraries/base/GHC/Base.hs
+++ b/libraries/base/GHC/Base.hs
@@ -150,7 +150,6 @@ default ()              -- Double isn't available yet
 {-
 Note [Depend on GHC.Num.Integer]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 The Integer type is special because GHC.CoreToStg.Prep.mkConvertNumLiteral
 lookups names in ghc-bignum interfaces to construct Integer literal values.
 Currently it reads the interface file whether or not the current module *has*
diff --git a/libraries/base/GHC/Bits.hs b/libraries/base/GHC/Bits.hs
index 012550a60bc478dd6cc0074479c75163412fc8f1..201340f34851f16e8305052a7cb8c5f714b511f4 100644
--- a/libraries/base/GHC/Bits.hs
+++ b/libraries/base/GHC/Bits.hs
@@ -668,8 +668,9 @@ isBitSubType x y
     ySigned = isSigned     y
 {-# INLINE isBitSubType #-}
 
-{-      Note [Constant folding for rotate]
-        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{-
+Note [Constant folding for rotate]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The INLINE on the Int instance of rotate enables it to be constant
 folded.  For example:
      sumU . mapU (`rotate` 3) . replicateU 10000000 $ (7 :: Int)
diff --git a/libraries/base/GHC/Enum.hs b/libraries/base/GHC/Enum.hs
index 5c184256f4bf454933557dc1f375d09b8f73b811..d80689423c0cbeb07a0b9bd9d3f3fe6f348829b0 100644
--- a/libraries/base/GHC/Enum.hs
+++ b/libraries/base/GHC/Enum.hs
@@ -179,7 +179,6 @@ boundedEnumFromThen n1 n2
 {-
 Note [Stable Unfolding for list producers]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 The INLINABLE/INLINE pragmas ensure that we export stable (unoptimised)
 unfoldings in the interface file so we can do list fusion at usage sites.
 -}
@@ -892,6 +891,7 @@ instance  Enum Integer  where
  #-}
 
 {- Note [Enum Integer rules for literal 1]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The "1" rules above specialise for the common case where delta = 1,
 so that we can avoid the delta>=0 test in enumDeltaToIntegerFB.
 Then enumDeltaToInteger1FB is nice and small and can be inlined,
diff --git a/libraries/base/GHC/Event/Windows.hsc b/libraries/base/GHC/Event/Windows.hsc
index 973f25722bd86a3fede821e003ae27c7c58935ac..97a224fdc7c3157d7736981fc841283c8aff09d6 100644
--- a/libraries/base/GHC/Event/Windows.hsc
+++ b/libraries/base/GHC/Event/Windows.hsc
@@ -495,9 +495,8 @@ associateHandle Manager{..} h =
       FFI.associateHandleWithIOCP mgrIOCP h (fromIntegral $ ptrToWordPtr h)
 
 
-{- Note [Why use non-waiting getOverlappedResult requests.]
-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
+{- Note [Why use non-waiting getOverlappedResult requests]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   When waiting for a request that is bound to be done soon
   we spin inside waitForCompletion. There are multiple reasons
   for this.
@@ -549,6 +548,7 @@ withOverlappedEx mgr fname h async offset startCB completionCB = do
               IOFailed  err -> signalThrow err
 
       -- Note [Memory Management]
+      -- ~~~~~~~~~~~~~~~~~~~~~~~~
       -- These callback data and especially the overlapped structs have to keep
       -- alive throughout the entire lifetime of the requests.   Since this
       -- function will block until done so it can call completionCB at the end
diff --git a/libraries/base/GHC/Event/Windows/FFI.hsc b/libraries/base/GHC/Event/Windows/FFI.hsc
index c087f94eb1b446b3e9bff72ad1c9ac28836a6f3c..e6ae168d3f5a4d55d2bec50c59b4c9fcbc7b6f88 100644
--- a/libraries/base/GHC/Event/Windows/FFI.hsc
+++ b/libraries/base/GHC/Event/Windows/FFI.hsc
@@ -136,6 +136,7 @@ foreign import WINDOWS_CCONV safe "windows.h GetQueuedCompletionStatusEx"
                                   -> Ptr ULONG -> DWORD -> BOOL -> IO BOOL
 
 -- | Note [Completion Ports]
+--   ~~~~~~~~~~~~~~~~~~~~~~~
 -- When an I/O operation has been queued by an operation
 -- (ReadFile/WriteFile/etc) it is placed in a queue that the driver uses when
 -- servicing IRQs.  This queue has some important properties:
@@ -343,7 +344,8 @@ pokeEventOverlapped lpol event = do
 ------------------------------------------------------------------------
 -- Request management
 
--- [Note AsyncHandles]
+-- Note [AsyncHandles]
+-- ~~~~~~~~~~~~~~~~~~~
 -- In `winio` we have designed it to work in asynchronous mode always.
 -- According to the MSDN documentation[1][2], when a handle is not opened
 -- in asynchronous mode then the operation would simply work but operate
@@ -392,7 +394,7 @@ pokeEventOverlapped lpol event = do
 --    invalid.  This is an issue because to pass `HANDLE`s we have to pass
 --    the native OS Handle not the Haskell one. i.e. remote-iserv.
 
--- See [Note AsyncHandles]
+-- See Note [AsyncHandles]
 withRequest :: Bool -> Word64 -> HANDLE -> IOCallback
             -> (Ptr HASKELL_OVERLAPPED -> Ptr CompletionData -> IO a)
             -> IO a
diff --git a/libraries/base/GHC/Foreign.hs b/libraries/base/GHC/Foreign.hs
index 658f5c3515a024763646d8a5eaee0076030cdf8c..042745ce1108b4d337ccb93a7b01141fd3d5cd09 100644
--- a/libraries/base/GHC/Foreign.hs
+++ b/libraries/base/GHC/Foreign.hs
@@ -295,7 +295,6 @@ tryFillBuffer encoder null_terminate from0 to_p to_sz_bytes = do
 {-
 Note [Check *before* fill in withEncodedCString]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 It's very important that the size check and readjustment peformed by tryFillBuffer
 happens before the continuation is called. The size check is the part which can
 fail, the call to the continuation never fails and so the caller should respond
diff --git a/libraries/base/GHC/ForeignPtr.hs b/libraries/base/GHC/ForeignPtr.hs
index a7061152dcc81cfab2c21f08210df6fc0563e655..0361857bcc3fd90a5fca5da7d45f204c173f51c3 100644
--- a/libraries/base/GHC/ForeignPtr.hs
+++ b/libraries/base/GHC/ForeignPtr.hs
@@ -165,7 +165,7 @@ data ForeignPtrContents
     -- The invariants that apply to 'MallocPtr' apply to 'PlainPtr' as well.
 
 -- Note [Why FinalPtr]
---
+-- ~~~~~~~~~~~~~~~~~~~
 -- FinalPtr exists as an optimization for foreign pointers created
 -- from Addr# literals. Most commonly, this happens in the bytestring
 -- library, where the combination of OverloadedStrings and a rewrite
@@ -376,7 +376,9 @@ addForeignPtrFinalizer (FunPtr fp) (ForeignPtr p c) = case c of
   MallocPtr     _ r -> insertCFinalizer r fp 0# nullAddr# p c
   _ -> errorWithoutStackTrace "GHC.ForeignPtr: attempt to add a finalizer to a plain pointer or a final pointer"
 
--- Note [MallocPtr finalizers] (#10904)
+-- Note [MallocPtr finalizers]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Ticket: #10904
 --
 -- When we have C finalizers for a MallocPtr, the memory is
 -- heap-resident and would normally be recovered by the GC before the
diff --git a/libraries/base/GHC/IO/Encoding/Failure.hs b/libraries/base/GHC/IO/Encoding/Failure.hs
index 271d66b35f77d549a2eaec4e877ffaacd9ddb969..a26e3f8fcc57cda55ad230af0f53e2b48223c6dc 100644
--- a/libraries/base/GHC/IO/Encoding/Failure.hs
+++ b/libraries/base/GHC/IO/Encoding/Failure.hs
@@ -58,7 +58,6 @@ data CodingFailureMode
 
 -- Note [Roundtripping]
 -- ~~~~~~~~~~~~~~~~~~~~
---
 -- Roundtripping is based on the ideas of PEP383.
 --
 -- We used to use the range of private-use characters from 0xEF80 to
diff --git a/libraries/base/GHC/IO/FD.hs b/libraries/base/GHC/IO/FD.hs
index 9e9fa428b88e23c50118efbf08d8889b8f0f2d2c..e97229704c0e7c2cdfe578f87c9efa5c59782474 100644
--- a/libraries/base/GHC/IO/FD.hs
+++ b/libraries/base/GHC/IO/FD.hs
@@ -537,8 +537,8 @@ fdWriteNonBlocking fd ptr _offset bytes = do
 #if !defined(mingw32_HOST_OS)
 
 {-
-NOTE [nonblock]:
-
+Note [nonblock]
+~~~~~~~~~~~~~~~
 Unix has broken semantics when it comes to non-blocking I/O: you can
 set the O_NONBLOCK flag on an FD, but it applies to the all other FDs
 attached to the same underlying file, pipe or TTY; there's no way to
diff --git a/libraries/base/GHC/IO/Handle/Internals.hs b/libraries/base/GHC/IO/Handle/Internals.hs
index 1bdb47b7cad2e087632272afffff970f63ec2e99..cbd43c166686703eb2d5ed182e720d9bb05c9b13 100644
--- a/libraries/base/GHC/IO/Handle/Internals.hs
+++ b/libraries/base/GHC/IO/Handle/Internals.hs
@@ -188,7 +188,7 @@ do_operation fun h act m = do
             throwIO e
 
 -- Note [async]
---
+-- ~~~~~~~~~~~~
 -- If an asynchronous exception is raised during an I/O operation,
 -- normally it is fine to just re-throw the exception synchronously.
 -- However, if we are inside an unsafePerformIO or an
@@ -644,7 +644,6 @@ flushByteReadBuffer h_@Handle__{..} = do
 
 {- Note [Making offsets for append]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
   The WINIO subysstem keeps track of offsets for handles
   on the Haskell side of things instead of letting the OS
   handle it. This requires us to establish the correct offset
diff --git a/libraries/base/GHC/IO/Handle/Text.hs b/libraries/base/GHC/IO/Handle/Text.hs
index bb576bcfd1b49006a31db8c2c0056ff3d979bc41..0e3dcd709ebedf7ef3b40e3a90327cc40bdcb537 100644
--- a/libraries/base/GHC/IO/Handle/Text.hs
+++ b/libraries/base/GHC/IO/Handle/Text.hs
@@ -320,7 +320,7 @@ unpack_nl !buf !r !w acc0
                 return (str, w)
 
 -- Note [#5536]
---
+-- ~~~~~~~~~~~~
 -- We originally had
 --
 --    let c' = desurrogatifyRoundtripCharacter c in
diff --git a/libraries/base/GHC/IO/Unsafe.hs b/libraries/base/GHC/IO/Unsafe.hs
index e6c43e920c5e0390e8af3044f5724c783284ecad..98d2e12345e30e6c0438e13dcd4849f9a420cc2c 100644
--- a/libraries/base/GHC/IO/Unsafe.hs
+++ b/libraries/base/GHC/IO/Unsafe.hs
@@ -27,8 +27,9 @@ module GHC.IO.Unsafe (
 
 import GHC.Base
 
-{- Note [unsafePerformIO and strictness]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{-
+Note [unsafePerformIO and strictness]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Consider this sub-expression (from tests/lib/should_run/memo002)
 
  unsafePerformIO (do { lockMemoTable
diff --git a/libraries/base/GHC/IO/Windows/Handle.hsc b/libraries/base/GHC/IO/Windows/Handle.hsc
index bee7bc73a2a972eb8bc8838caf65ec4987671a37..784a08f9d237c9b245563feb330ffbb1879e6d71 100644
--- a/libraries/base/GHC/IO/Windows/Handle.hsc
+++ b/libraries/base/GHC/IO/Windows/Handle.hsc
@@ -526,6 +526,7 @@ hwndWriteNonBlocking hwnd ptr offset bytes
         | otherwise                        = Mgr.ioFailed err
 
 -- Note [ReadFile/WriteFile]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~
 -- The results of these functions are somewhat different when working in an
 -- asynchronous manner. The returning bool has two meaning.
 --
diff --git a/libraries/base/GHC/IORef.hs b/libraries/base/GHC/IORef.hs
index 616950c3c0de1be5e8740ec35fb97909a3727a37..db932fca90d366578f41124019ee6c97f3659995 100644
--- a/libraries/base/GHC/IORef.hs
+++ b/libraries/base/GHC/IORef.hs
@@ -127,7 +127,6 @@ atomicModifyIORef' ref f = do
 
 -- Note [atomicModifyIORef' definition]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- atomicModifyIORef' was historically defined
 --
 --    atomicModifyIORef' ref f = do
diff --git a/libraries/base/GHC/Int.hs b/libraries/base/GHC/Int.hs
index a514f4d35b97cfb4131fa6e10e378417b61d11a9..bde52848fd0071ac1cc5a048c2f84bb7fdaff68d 100644
--- a/libraries/base/GHC/Int.hs
+++ b/libraries/base/GHC/Int.hs
@@ -160,13 +160,13 @@ instance Integral Int8 where
         | otherwise                  = I8# (x# `modInt8#` y#)
     quotRem x@(I8# x#) y@(I8# y#)
         | y == 0                     = divZeroError
-          -- Note [Order of tests]
+          -- See Note [Order of tests]
         | y == (-1) && x == minBound = (overflowError, 0)
         | otherwise                  = case x# `quotRemInt8#` y# of
                                        (# q, r #) -> (I8# q, I8# r)
     divMod  x@(I8# x#) y@(I8# y#)
         | y == 0                     = divZeroError
-          -- Note [Order of tests]
+          -- See Note [Order of tests]
         | y == (-1) && x == minBound = (overflowError, 0)
         | otherwise                  = case  x# `divModInt8#` y# of
                                        (# d, m #) -> (I8# d, I8# m)
@@ -371,13 +371,13 @@ instance Integral Int16 where
         | otherwise                  = I16# (x# `modInt16#` y#)
     quotRem x@(I16# x#) y@(I16# y#)
         | y == 0                     = divZeroError
-          -- Note [Order of tests]
+          -- See Note [Order of tests]
         | y == (-1) && x == minBound = (overflowError, 0)
         | otherwise                  = case x# `quotRemInt16#` y# of
                                        (# q, r #) -> (I16# q, I16# r)
     divMod  x@(I16# x#) y@(I16# y#)
         | y == 0                     = divZeroError
-          -- Note [Order of tests]
+          -- See Note [Order of tests]
         | y == (-1) && x == minBound = (overflowError, 0)
         | otherwise                  = case x# `divModInt16#` y# of
                                        (# d, m #) -> (I16# d, I16# m)
@@ -579,13 +579,13 @@ instance Integral Int32 where
         | otherwise                  = I32# (x# `modInt32#` y#)
     quotRem x@(I32# x#) y@(I32# y#)
         | y == 0                     = divZeroError
-          -- Note [Order of tests]
+          -- See Note [Order of tests]
         | y == (-1) && x == minBound = (overflowError, 0)
         | otherwise                  = case x# `quotRemInt32#` y# of
                                        (# q, r #) -> (I32# q, I32# r)
     divMod  x@(I32# x#) y@(I32# y#)
         | y == 0                     = divZeroError
-          -- Note [Order of tests]
+          -- See Note [Order of tests]
         | y == (-1) && x == minBound = (overflowError, 0)
         | otherwise                  = case x# `divModInt32#` y# of
                                        (# d, m #) -> (I32# d, I32# m)
@@ -779,7 +779,7 @@ instance Enum Int64 where
 instance Integral Int64 where
     quot    x@(I64# x#) y@(I64# y#)
         | y == 0                     = divZeroError
-        | y == (-1) && x == minBound = overflowError -- Note [Order of tests]
+        | y == (-1) && x == minBound = overflowError -- See Note [Order of tests]
         | otherwise                  = I64# (x# `quotInt64#` y#)
     rem       (I64# x#) y@(I64# y#)
         | y == 0                     = divZeroError
@@ -791,7 +791,7 @@ instance Integral Int64 where
         | otherwise                  = I64# (x# `remInt64#` y#)
     div     x@(I64# x#) y@(I64# y#)
         | y == 0                     = divZeroError
-        | y == (-1) && x == minBound = overflowError -- Note [Order of tests]
+        | y == (-1) && x == minBound = overflowError -- See Note [Order of tests]
         | otherwise                  = I64# (x# `divInt64#` y#)
     mod       (I64# x#) y@(I64# y#)
         | y == 0                     = divZeroError
@@ -803,7 +803,7 @@ instance Integral Int64 where
         | otherwise                  = I64# (x# `modInt64#` y#)
     quotRem x@(I64# x#) y@(I64# y#)
         | y == 0                     = divZeroError
-          -- Note [Order of tests]
+          -- See Note [Order of tests]
         | y == (-1) && x == minBound = (overflowError, 0)
 #if WORD_SIZE_IN_BITS < 64
         -- we don't have quotRemInt64# primop yet
@@ -814,7 +814,7 @@ instance Integral Int64 where
 #endif
     divMod  x@(I64# x#) y@(I64# y#)
         | y == 0                     = divZeroError
-          -- Note [Order of tests]
+          -- See Note [Order of tests]
         | y == (-1) && x == minBound = (overflowError, 0)
 #if WORD_SIZE_IN_BITS < 64
         -- we don't have divModInt64# primop yet
@@ -952,8 +952,9 @@ instance Ix Int64 where
 
 -------------------------------------------------------------------------------
 
-{- Note [Order of tests]
-~~~~~~~~~~~~~~~~~~~~~~~~~
+{-
+Note [Order of tests]
+~~~~~~~~~~~~~~~~~~~~~
 (See #3065, #5161.) Suppose we had a definition like:
 
     quot x y
diff --git a/libraries/base/GHC/List.hs b/libraries/base/GHC/List.hs
index 677d0fe7c160d16d1da00dd16090aa871f358807..8dc897c73c425b0eb340756fbbe3b95da7cd67ac 100644
--- a/libraries/base/GHC/List.hs
+++ b/libraries/base/GHC/List.hs
@@ -284,7 +284,7 @@ foldl k z0 xs =
 
 {-
 Note [Left folds via right fold]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Implementing foldl et. al. via foldr is only a good idea if the compiler can
 optimize the resulting code (eta-expand the recursive "go"). See #7994.
 We hope that one of the two measure kick in:
@@ -309,7 +309,7 @@ inline FB functions because:
 * They are higher-order functions and therefore benefit from inlining.
 
 * When the final consumer is a left fold, inlining the FB functions is the only
-  way to make arity expansion happen. See Note [Left fold via right fold].
+  way to make arity expansion happen. See Note [Left folds via right fold].
 
 For this reason we mark all FB functions INLINE [0]. The [0] phase-specifier
 ensures that calls to FB functions can be written back to the original form
@@ -458,7 +458,7 @@ scanl                   = scanlGo
                                []   -> []
                                x:xs -> scanlGo f (f q x) xs)
 
--- Note [scanl rewrite rules]
+-- See Note [scanl rewrite rules]
 {-# RULES
 "scanl"  [~1] forall f a bs . scanl f a bs =
   build (\c n -> a `c` foldr (scanlFB f c) (constScanl n) bs a)
@@ -509,7 +509,7 @@ scanl' = scanlGo'
                             []   -> []
                             x:xs -> scanlGo' f (f q x) xs)
 
--- Note [scanl rewrite rules]
+-- See Note [scanl rewrite rules]
 {-# RULES
 "scanl'"  [~1] forall f a bs . scanl' f a bs =
   build (\c n -> a `c` foldr (scanlFB' f c) (flipSeqScanl' n) bs a)
@@ -529,7 +529,6 @@ flipSeqScanl' a !_b = a
 {-
 Note [scanl rewrite rules]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 In most cases, when we rewrite a form to one that can fuse, we try to rewrite it
 back to the original form if it does not fuse. For scanl, we do something a
 little different. In particular, we rewrite
@@ -651,7 +650,9 @@ scanrFB f c = \x ~(r, est) -> (f x r, r `c` est)
                  scanr f q0 ls
  #-}
 
-{- Note [scanrFB and evaluation]
+{-
+Note [scanrFB and evaluation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 In a previous Version, the pattern match on the tuple in scanrFB used to be
 strict. If scanr is called with a build expression, the following would happen:
 The rule "scanr" would fire, and we obtain
@@ -1417,8 +1418,9 @@ foldr3_left _  z _ _  _      _     = z
                   foldr3 k z (build g) = g (foldr3_left k z) (\_ _ -> z)
  #-}
 
-{- Note [Fusion for foldrN]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{-
+Note [Fusion for foldrN]
+~~~~~~~~~~~~~~~~~~~~~~~~
 We arrange that foldr2, foldr3, etc is a good consumer for its first
 (left) list argument. Here's how. See below for the second, third
 etc list arguments
diff --git a/libraries/base/GHC/Read.hs b/libraries/base/GHC/Read.hs
index 7f698ec498974a9b02d09cbff93a34e2304aa5e8..2c9aec08cd2e62c22fc4fa77c24e5656043a87a0 100644
--- a/libraries/base/GHC/Read.hs
+++ b/libraries/base/GHC/Read.hs
@@ -410,7 +410,7 @@ readSymField fieldName readVal = do
 
 
 -- Note [Why readField]
---
+-- ~~~~~~~~~~~~~~~~~~~~
 -- Previously, the code for automatically deriving Read instance (in
 -- typecheck/GHC.Tc.Deriv.Generate.hs) would generate inline code for parsing fields;
 -- this, however, turned out to produce massive amounts of intermediate code,
diff --git a/libraries/base/GHC/Real.hs b/libraries/base/GHC/Real.hs
index d970a3e1edaac376baf5a1320ce09c3dcc9db178..e8cfbfbc57e133394b697a10794f3a70a69c98fd 100644
--- a/libraries/base/GHC/Real.hs
+++ b/libraries/base/GHC/Real.hs
@@ -301,7 +301,7 @@ numericEnumFromThenTo e1 e2 e3
                                            | otherwise = (>= e3 + mid)
 
 {- Note [Numeric Stability of Enumerating Floating Numbers]
------------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 When enumerate floating numbers, we could add the increment to the last number
 at every run (as what we did previously):
 
diff --git a/libraries/base/GHC/TopHandler.hs b/libraries/base/GHC/TopHandler.hs
index ba86798091c04151418b52e8f8d96fc8e87849e7..6a4e0325a622b9511639862d52c4f7b0d2df4b43 100644
--- a/libraries/base/GHC/TopHandler.hs
+++ b/libraries/base/GHC/TopHandler.hs
@@ -52,14 +52,13 @@ import Data.Dynamic (toDyn)
 
 -- Note [rts_setMainThread must be called unsafely]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- rts_setMainThread must be called as unsafe, because it
 -- dereferences the Weak# and manipulates the raw Haskell value
 -- behind it.  Therefore, it must not race with a garbage collection.
 
+
 -- Note [rts_setMainThread has an unsound type]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- 'rts_setMainThread' is imported with type Weak# ThreadId -> IO (),
 -- but this is an unsound type for it: it grabs the /key/ of the
 -- 'Weak#' object, which isn't tracked by the type at all.
@@ -212,9 +211,9 @@ disasterHandler exit _ =
         "encode an error message. Check that your locale is configured " ++
         "properly."
 
-{- Note [Disaster with iconv]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
+{-
+Note [Disaster with iconv]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 When using iconv, it's possible for things like iconv_open to fail in
 restricted environments (like an initram or restricted container), but
 when this happens the error raised inevitably calls `peekCString`,
diff --git a/libraries/base/GHC/TypeNats.hs b/libraries/base/GHC/TypeNats.hs
index 4325f3a8bf7656ab5e9399cec60025bc2038274e..ef4a5a8817586d47ddd2e593164e84bcf70d6a1e 100644
--- a/libraries/base/GHC/TypeNats.hs
+++ b/libraries/base/GHC/TypeNats.hs
@@ -92,8 +92,9 @@ someNatVal :: Natural -> SomeNat
 someNatVal n = withSNat SomeNat (SNat n) Proxy
 {-# NOINLINE someNatVal #-} -- See Note [NOINLINE someNatVal]
 
-{- Note [NOINLINE someNatVal]
-
+{-
+Note [NOINLINE someNatVal]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 `someNatVal` converts a natural number to an existentially quantified
 dictionary for `KnownNat` (aka `SomeNat`).  The existential quantification
 is very important, as it captures the fact that we don't know the type
diff --git a/libraries/base/System/IO.hs b/libraries/base/System/IO.hs
index f831df6cb4fd9f78f4a35d8f4ac03cc30c58adb6..6e8c4308d1aee67ca6e624dc5aa9448c8476f6ff 100644
--- a/libraries/base/System/IO.hs
+++ b/libraries/base/System/IO.hs
@@ -434,7 +434,6 @@ fixIO k = do
 
 -- Note [Blackholing in fixIO]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- We do our own explicit black holing here, because GHC's lazy
 -- blackholing isn't enough.  In an infinite loop, GHC may run the IO
 -- computation a few times before it notices the loop, which is wrong.
diff --git a/libraries/base/Unsafe/Coerce.hs b/libraries/base/Unsafe/Coerce.hs
index 9504eb13e2bc3fd4771fcea82d79a8455bdc8129..930514ce81a37cad8dd0a0a393e9687b980e39bf 100644
--- a/libraries/base/Unsafe/Coerce.hs
+++ b/libraries/base/Unsafe/Coerce.hs
@@ -286,7 +286,7 @@ unsafeCoerce# :: forall (r1 :: RuntimeRep) (r2 :: RuntimeRep)
                         (a :: TYPE r1) (b :: TYPE r2).
                  a -> b
 unsafeCoerce# = error "GHC internal error: unsafeCoerce# not unfolded"
--- See (U10) of Note [Implementing unsafeCorece]
+-- See (U10) of Note [Implementing unsafeCoerce]
 -- The RHS is updated by Desugar.patchMagicDefns
 -- See Desugar Note [Wiring in unsafeCoerce#]
 
diff --git a/libraries/base/cbits/inputReady.c b/libraries/base/cbits/inputReady.c
index 46b5577cfc905e11fa92a9dd6e01d71b99c63540..3f636c6b2875102e71e4d67fce2b4221c723fba2 100644
--- a/libraries/base/cbits/inputReady.c
+++ b/libraries/base/cbits/inputReady.c
@@ -168,7 +168,7 @@ fdReady(int fd, bool write, int64_t msecs, bool isSock)
     Time remaining = MSToTime(msecs);
 
     // Note [Guaranteed syscall time spent]
-    //
+    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     // The implementation ensures that if fdReady() is called with N `msecs`,
     // it will not return before an FD-polling syscall *returns*
     // with `endTime` having passed.
diff --git a/libraries/ghc-bignum/src/GHC/Num/Primitives.hs b/libraries/ghc-bignum/src/GHC/Num/Primitives.hs
index d286a8d9339eab3b6185c471e646054a96061e57..a33fd68b23c3a44f7c5007e5ce2f92d8ec182aaa 100644
--- a/libraries/ghc-bignum/src/GHC/Num/Primitives.hs
+++ b/libraries/ghc-bignum/src/GHC/Num/Primitives.hs
@@ -598,7 +598,6 @@ ioBool (IO io) s = case io s of
 
 -- Note [ghc-bignum exceptions]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- `ghc-bignum` package can't depend on `base` package (it would create a cyclic
 -- dependency). Hence it can't import "Control.Exception" and throw exceptions
 -- the usual way. Instead it uses some wired-in functions from `ghc-prim` which
diff --git a/libraries/ghc-boot/GHC/BaseDir.hs b/libraries/ghc-boot/GHC/BaseDir.hs
index db470425c0f4c3aaa72f712bd2312ba82fa97af1..dbbf61d02e09c1b80806c1a23963d479e74fbd7f 100644
--- a/libraries/ghc-boot/GHC/BaseDir.hs
+++ b/libraries/ghc-boot/GHC/BaseDir.hs
@@ -2,7 +2,6 @@
 
 -- | Note [Base Dir]
 -- ~~~~~~~~~~~~~~~~~
---
 -- GHC's base directory or top directory containers miscellaneous settings and
 -- the package database.  The main compiler of course needs this directory to
 -- read those settings and read and write packages. ghc-pkg uses it to find the
@@ -12,6 +11,7 @@
 -- will expand `${top_dir}` inside strings so GHC doesn't need to know it's on
 -- installation location at build time. ghc-pkg also can expand those variables
 -- and so needs the top dir location to do that too.
+
 module GHC.BaseDir where
 
 import Prelude -- See Note [Why do we import Prelude here?]
diff --git a/libraries/ghc-prim/GHC/CString.hs b/libraries/ghc-prim/GHC/CString.hs
index 1edeecbbfac1db134a9a33fb9a14e4aeff3d8d0e..680d3c8a39aad16fc6e9e872b9f8e9b5afe67504 100644
--- a/libraries/ghc-prim/GHC/CString.hs
+++ b/libraries/ghc-prim/GHC/CString.hs
@@ -135,9 +135,9 @@ Moreover, we want to make it CONLIKE, so that:
 All of this goes for unpackCStringUtf8# too.
 -}
 
-{- Note [Inlining of unpackFoldrCString]
+{-
+Note [Inlining of unpackFoldrCString]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 Usually the unpack-list rule turns unpackFoldrCString# into unpackCString#
 It also has a BuiltInRule in PrelRules.hs:
      unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
@@ -154,9 +154,8 @@ when looking at nofib.
 This is especially important for elem which then results in an
 allocation free loop.
 
-  Note [unpackCString# iterating over addr]
+Note [unpackCString# iterating over addr]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 When unpacking unpackCString# and friends repeatedly return a cons cell
 containing:
 * The current character we just unpacked.
@@ -175,7 +174,6 @@ the string and the current offset, saving a word for each character unpacked.
 
 This has the additional advantage the we can guarantee that only the
 increment will happen in the loop.
-
 -}
 
 unpackCString# :: Addr# -> [Char]
diff --git a/libraries/ghc-prim/GHC/Classes.hs b/libraries/ghc-prim/GHC/Classes.hs
index 13e9556864de0ed646f1a2b8ff536bfd7bc11c84..aa1c1b2d8b78a6076614ee71d27a811ccb984365 100644
--- a/libraries/ghc-prim/GHC/Classes.hs
+++ b/libraries/ghc-prim/GHC/Classes.hs
@@ -587,7 +587,6 @@ x# `divInt32#` y# = ((x# `plusInt32#` bias#) `quotInt32#` y#) `subInt32#` hard#
 
 -- Note [divInt# implementation]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- divInt# (truncated toward zero) is implemented with quotInt# (truncated
 -- toward negative infinity). They differ when inputs x and y have different signs:
 --  - x `rem` y has the sign of x and (x `quot` y)*y + (x `rem` y) == x
@@ -705,7 +704,6 @@ x# `modInt32#` y# = r# `plusInt32#` k#
 
 -- Note [modInt# implementation]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- Similarly to divInt# (see Note [divInt# implementation]), we can derive the
 -- branchless implementation of modInt# as follows:
 --
@@ -823,7 +821,6 @@ x# `divModInt32#` y# = case (x# `plusInt32#` bias#) `quotRemInt32#` y# of
 
 -- Note [divModInt# implementation]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- divModInt# is written by deriving the following code similarly to divInt# and
 -- modInt# (see Note [divInt# implementation] and Note [modInt#
 -- implementation]).
diff --git a/libraries/ghc-prim/GHC/Prim/Exception.hs b/libraries/ghc-prim/GHC/Prim/Exception.hs
index 592d597f444d3cd284ba63a7b5d3438da2310bc0..9d496d397cd0c385417861628afedc2bd3cfbccd 100644
--- a/libraries/ghc-prim/GHC/Prim/Exception.hs
+++ b/libraries/ghc-prim/GHC/Prim/Exception.hs
@@ -20,7 +20,6 @@ default () -- Double and Integer aren't available yet
 
 -- Note [Arithmetic exceptions]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- ghc-prim provides several functions to raise arithmetic exceptions
 -- (raiseDivZero, raiseUnderflow, raiseOverflow) that are wired-in the RTS.
 -- These exceptions are meant to be used by the package implementing arbitrary
diff --git a/libraries/ghc-prim/GHC/Prim/PtrEq.hs b/libraries/ghc-prim/GHC/Prim/PtrEq.hs
index 5cc3e511e63bf36b5af4499171bd61ff3b06417a..ba5885786898f3b938c89fd0903e6d8245b4fa34 100644
--- a/libraries/ghc-prim/GHC/Prim/PtrEq.hs
+++ b/libraries/ghc-prim/GHC/Prim/PtrEq.hs
@@ -45,7 +45,6 @@ default () -- Double and Integer aren't available yet
 
 {- Note [Pointer equality operations]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 Many primitive types - such as Array#, ByteArray#, MVar#, ... - are boxed:
 they are represented by pointers to the underlying data. It is thus possible
 to directly compare these pointers for equality, as opposed to comparing
@@ -55,7 +54,7 @@ two arrays element-wise).
 To do this, GHC provides the primop reallyUnsafePtrEquality#, which is
 both levity-polymorphic and heterogeneous. As its name indicates, it is an
 unsafe operation which can yield unpredictable results, as explained in
-  Note [Pointer comparison operations] in primops.txt.pp
+Note [Pointer comparison operations] in primops.txt.pp
 
 For a more user-friendly interface, this module defines specialisations of
 the reallyUnsafePtrEquality# primop at various primitive types, such as
@@ -116,7 +115,6 @@ sameIOPort# = reallyUnsafePtrEquality#
 
 -- Note [Comparing stable names]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 -- A StableName# is actually a pointer to a stable name object (SNO)
 -- containing an index into the stable name table (SNT). We
 -- used to compare StableName#s by following the pointers to the
diff --git a/libraries/ghc-prim/GHC/Types.hs b/libraries/ghc-prim/GHC/Types.hs
index 22e637af6e0e82799578d4a05c2a1a9affa0283c..eaffc5c6d06ceee2d6c2c56b7f289ecec077ffdf 100644
--- a/libraries/ghc-prim/GHC/Types.hs
+++ b/libraries/ghc-prim/GHC/Types.hs
@@ -84,7 +84,7 @@ This declaration is important for :info (->) command (issue #10145)
 
 -- | The regular function type
 type (->) = FUN 'Many
--- See Note [Linear Types] in Multiplicity
+-- See Note [Linear types] in Multiplicity
 
 {- *********************************************************************
 *                                                                      *
@@ -261,7 +261,6 @@ newtype IO a = IO (State# RealWorld -> (# State# RealWorld, a #))
 {-
 Note [Kind-changing of (~) and Coercible]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 (~) and Coercible are tricky to define. To the user, they must appear as
 constraints, but we cannot define them as such in Haskell. But we also cannot
 just define them only in GHC.Prim (like (->)), because we need a real module
diff --git a/libraries/ghc-prim/cbits/atomic.c b/libraries/ghc-prim/cbits/atomic.c
index af26e16268d107669c8119ab6647ee4d29da524f..2ac6d26e1fe7a3d500c56906077ab4cf388d4e98 100644
--- a/libraries/ghc-prim/cbits/atomic.c
+++ b/libraries/ghc-prim/cbits/atomic.c
@@ -110,7 +110,6 @@ hs_atomic_and64(StgWord x, StgWord64 val)
 
 // Note [__sync_fetch_and_nand usage]
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-//
 // The __sync_fetch_and_nand builtin is a bit of a disaster. It was introduced
 // in GCC long ago with silly semantics. Specifically:
 //
diff --git a/libraries/ghc-prim/cbits/bitrev.c b/libraries/ghc-prim/cbits/bitrev.c
index eecbfe5ac55bb03b1bc698a54d9dc06911ac3048..2fcbd192b326461b3917ed86e8d7577b5aecfa91 100644
--- a/libraries/ghc-prim/cbits/bitrev.c
+++ b/libraries/ghc-prim/cbits/bitrev.c
@@ -3,7 +3,6 @@
 /*
 Note [Bit reversal primop]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 There are two main ways of reversing the bit order of a word: bit twiddling
 and using a lookup table.
 See [this excellent](https://stackoverflow.com/questions/746171/most-efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c this)
diff --git a/libraries/ghci/GHCi/TH.hs b/libraries/ghci/GHCi/TH.hs
index 723e966095276d0cac1e5314d564c8957be83a33..77ddd0ccc84e5ffd96a9ec2299a8260a7a2520db 100644
--- a/libraries/ghci/GHCi/TH.hs
+++ b/libraries/ghci/GHCi/TH.hs
@@ -13,7 +13,7 @@ module GHCi.TH
   ) where
 
 {- Note [Remote Template Haskell]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Here is an overview of how TH works with -fexternal-interpreter.
 
 Initialisation
diff --git a/libraries/template-haskell/Language/Haskell/TH/Syntax.hs b/libraries/template-haskell/Language/Haskell/TH/Syntax.hs
index f30bb0ef876ccfa0334f909a0d667fea4d12d043..7446297762805b61a4d4ee6f5ec01fc3e49a0fd4 100644
--- a/libraries/template-haskell/Language/Haskell/TH/Syntax.hs
+++ b/libraries/template-haskell/Language/Haskell/TH/Syntax.hs
@@ -2592,7 +2592,6 @@ data Con = NormalC Name [BangType]       -- ^ @C Int a@
 
 -- Note [GADT return type]
 -- ~~~~~~~~~~~~~~~~~~~~~~~
---
 -- The return type of a GADT constructor does not necessarily match the name of
 -- the data type:
 --
diff --git a/m4/fptools_set_haskell_platform_vars.m4 b/m4/fptools_set_haskell_platform_vars.m4
index cd44838358109f0bd8fea7d84f7798997ffae781..62563633f54564eed5b8ea20c2d9265045ba5fda 100644
--- a/m4/fptools_set_haskell_platform_vars.m4
+++ b/m4/fptools_set_haskell_platform_vars.m4
@@ -116,7 +116,6 @@ AC_DEFUN([FPTOOLS_SET_HASKELL_PLATFORM_VARS_SHELL_FUNCTIONS],
 
 # Note [autoconf assembler checks and -flto]
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
 # Autoconf's AC_COMPILE_IFELSE macro is fragile in the case of checks
 # which require that the assembler is run. Specifically, GCC does not run
 # the assembler if invoked with `-c -flto`; it merely dumps its internal
diff --git a/mk/config.mk.in b/mk/config.mk.in
index c13332169de1d92fcc70292342836145b16a1884..94761796066b973ef8e71105488734bdd02a1c29 100644
--- a/mk/config.mk.in
+++ b/mk/config.mk.in
@@ -74,8 +74,8 @@ GhcStage2HcOpts=-O2 -haddock
 GhcStage3HcOpts=-O2 -haddock
 
 
-# Note [Stage number in build variables].
-#
+# Note [Stage number in build variables]
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # There are (unfortunately) two different naming schemes for build variables
 # specific to a certain stage.
 #
@@ -551,7 +551,7 @@ CrossCompilePrefix    = $(if $(filter YES,$(Stage1Only)),@CrossCompilePrefix@,)
 INSTALL_GHC_STAGE= $(if $(filter YES,$(Stage1Only)),1,2)
 
 # Note [CrossCompiling vs Stage1Only]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # There are 4 possible settings:
 #
 # 1 CrossCompiling=NO Stage1Only=NO
@@ -570,7 +570,7 @@ INSTALL_GHC_STAGE= $(if $(filter YES,$(Stage1Only)),1,2)
 # [1] https://gitlab.haskell.org/ghc/ghc/wikis/building/cross-compiling
 
 # Note [Stage1Only vs stage=1]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # Stage1Only=YES means:
 #   - don't build ghc-stage2 (the executable)
 #   - don't build utils that rely on ghc-stage2
@@ -891,6 +891,7 @@ GhcLibHcOpts=
 endif
 
 # Note [Disable -O2 in unregisterised mode]
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # Disable -O2 optimization in unregisterised mode. Otherwise amount
 # of generated C code # makes things very slow to compile (~5 minutes
 # on core-i7 for 'compiler/GHC/Hs/Expr.hs') and sometimes not compile
diff --git a/mk/warnings.mk b/mk/warnings.mk
index ed549aa33a23b16e394355cf30885d50f0de2311..bdbdc79213edbfb11c77cb8c975cd489d5e13419 100644
--- a/mk/warnings.mk
+++ b/mk/warnings.mk
@@ -133,7 +133,7 @@ GhcLibExtraHcOpts += -Wno-deprecated-flags
 GhcBootLibExtraHcOpts += -fno-warn-deprecated-flags
 
 # Note [Order of warning flags]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # In distdir-way-opts, build flags are added in the following order (this
 # list is not exhaustive):
 #
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index a706c68194b0c5470229be3c02b0ec6ce073b9c7..4c3177ae2f598e742c08f925dd9bede84b4246b4 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -38,7 +38,7 @@ stg_ap_0_fast ( P_ fun )
 
 /*
   Note [Evaluating functions with profiling]
-
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   If we evaluate something like
 
     let f = {-# SCC "f" #-} g
@@ -461,8 +461,8 @@ for:
    -------------------------------------------------------------------------- */
 
 /*
- Note [AP_STACKs must be eagerly blackholed]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [AP_STACKs must be eagerly blackholed]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #13615 describes a nasty concurrency issue where we can enter into the
 middle of an ST action multiple times, resulting in duplication of effects.
 In short, the construction of an AP_STACK allows us to suspend a computation
diff --git a/rts/Capability.c b/rts/Capability.c
index 374dfe8de7b3c1cfc18e9b726c3f1155293099b3..7ebe51609fbd4f2f5009b8af5d1c7f2e0969df87 100644
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -660,7 +660,6 @@ enqueueWorker (Capability* cap USED_IF_THREADS)
 /*
  * Note [Benign data race due to work-pushing]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  * #17276 points out a tricky data race (noticed by ThreadSanitizer) between
  * waitForWorkerCapability and schedulePushWork. In short, schedulePushWork
  * works as follows:
@@ -1039,7 +1038,6 @@ yieldCapability
 /*
  * Note [migrated bound threads]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  * There's a tricky case where:
  *    - cap A is running an unbound thread T1
  *    - there is a bound thread T2 at the head of the run queue on cap A
@@ -1060,7 +1058,6 @@ yieldCapability
  *
  * Note [migrated bound threads 2]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  * Second tricky case;
  *   - A bound Task becomes a GC thread
  *   - scheduleDoGC() migrates the thread belonging to this Task,
diff --git a/rts/Compact.cmm b/rts/Compact.cmm
index bae94a03cd8c3189d1368af61baffd1fdaabe5f5..8a358e1da531f0c52b64ee512cb127ff41b3e645 100644
--- a/rts/Compact.cmm
+++ b/rts/Compact.cmm
@@ -297,7 +297,7 @@ stg_compactAddWithSharingzh (P_ compact, P_ p)
     StgCompactNFData_hash(compact) = hash;
 
     // Note [compactAddWorker result]
-    //
+    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     // compactAddWorker needs somewhere to store the result - this is
     // so that it can be tail-recursive.  It must be an address that
     // doesn't move during GC, so we can't use heap or stack.
diff --git a/rts/ForeignExports.c b/rts/ForeignExports.c
index e218281b51399a7b5d0f5ca5196cd7918fc53c56..e4d7d9a39a0d50d2ddd511d1e69aa16e1c73f4d1 100644
--- a/rts/ForeignExports.c
+++ b/rts/ForeignExports.c
@@ -17,7 +17,6 @@ static ObjectCode *loading_obj = NULL;
 /*
  * Note [Tracking foreign exports]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  * Foreign exports are garbage collection roots. That is, things (e.g. CAFs)
  * depended upon by a module's `foreign export`s need to be kept alive for as
  * long an module is loaded. To ensure this we create a stable pointer to each
diff --git a/rts/IPE.c b/rts/IPE.c
index 3557b0f33f6689f2600c0c58d5f211450b4489d4..5ab8a861fd126038e36b25b1c08407f2ad8c6034 100644
--- a/rts/IPE.c
+++ b/rts/IPE.c
@@ -27,7 +27,6 @@
 /*
 Note [The Info Table Provenance Entry (IPE) Map]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 IPEs are stored in a hash map from info table address (pointer) to IPE. This
 ensures cheap lookup and traversal.
 
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index bcda08018a3b11722bc4a3f04449ff2e6e5e6002..c911d99367175bafc07a7ba3fb99a3b6bffe9cd6 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -103,7 +103,7 @@
 #endif
 
 // Note [Not true: ASSERT(Sp > SpLim)]
-//
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 // SpLim has some headroom (RESERVED_STACK_WORDS) to allow for saving
 // any necessary state on the stack when returning to the scheduler
 // when a stack check fails..  The upshot of this is that Sp could be
@@ -117,7 +117,7 @@
    return cap;
 
 // Note [avoiding threadPaused]
-//
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 // Switching between the interpreter to compiled code can happen very
 // frequently, so we don't want to call threadPaused(), which is
 // expensive.  BUT we must be careful not to violate the invariant
@@ -1678,7 +1678,7 @@ run_BCO:
             SET_HDR(con, (StgInfoTable*)BCO_LIT(o_itbl), cap->r.rCCCS);
 
             // Note [Data constructor dynamic tags]
-            //
+            // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             // compute the pointer tag for the constructor and tag the pointer
             //
             //     - 1..(TAG_MASK-1): for first TAG_MASK-1 constructors
diff --git a/rts/Linker.c b/rts/Linker.c
index e30fadb26205b619b0b13d325b688e638ae3673f..6c13213092fcbe94bcc4838a62e362debbdb3c70 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -80,7 +80,7 @@
 #endif
 /*
    Note [runtime-linker-support]
-   -----------------------------
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    When adding support for a new platform to the runtime linker please
    update `$TOP/configure.ac` under heading `Does target have runtime
    linker support?`.
@@ -94,7 +94,7 @@
    addresses of unloaded symbols.
 
    Note [runtime-linker-phases]
-   --------------------------------------
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Broadly the behavior of the runtime linker can be
    split into the following four phases:
 
@@ -247,7 +247,7 @@ static void ghciRemoveSymbolTable(StrHashTable *table, const SymbolName* key,
  */
 /*
  Note [weak-symbols-support]
- -------------------------------------
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  While ghciInsertSymbolTable does implement extensive
  logic for weak symbol support, weak symbols are not currently
  fully supported by the RTS. This code is mostly here for COMDAT
@@ -610,7 +610,7 @@ internal_dlopen(const char *dll_name)
 
 /*
   Note [RTLD_LOCAL]
-
+  ~~~~~~~~~~~~~~~~~
   In GHCi we want to be able to override previous .so's with newly
   loaded .so's when we recompile something.  This further implies that
   when we look up a symbol in internal_dlsym() we have to iterate
@@ -1716,6 +1716,7 @@ HsInt loadOc (ObjectCode* oc)
    }
 
    /* Note [loadOc orderings]
+      ~~~~~~~~~~~~~~~~~~~~~~~
       The order of `ocAllocateExtras` and `ocGetNames` matters. For MachO
       and ELF, `ocInit` and `ocGetNames` initialize a bunch of pointers based
       on the offset to `oc->image`, but `ocAllocateExtras` may relocate
diff --git a/rts/LinkerInternals.h b/rts/LinkerInternals.h
index 7058ad355b62313834dc90501a3addeb04617297..f3d918e355c799b173a2c1cafba30fd82a287a72 100644
--- a/rts/LinkerInternals.h
+++ b/rts/LinkerInternals.h
@@ -118,6 +118,7 @@ typedef enum {
 
 /*
  * Note [No typedefs for customizable types]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * Some pointer-to-struct types are defined opaquely
  * first, and customized later to architecture/ABI-specific
  * instantiations. Having the usual
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index c5b6065ec284081a407cb67d96cb529eea75fb06..84c5850f9711caf5a4e818009695f2fe1fd3cf62 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -166,7 +166,6 @@ stg_isMutableByteArrayPinnedzh ( gcptr mba )
 
 /* Note [LDV profiling and resizing arrays]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  * As far as the LDV profiler is concerned arrays are "inherently used" which
  * means we don't track their time of use and eventual destruction. We just
  * assume they get used.
@@ -1562,7 +1561,7 @@ stg_writeTVarzh (P_ tvar,     /* :: TVar a */
  * exception and never perform its take or put, and we'd end up with a
  * deadlock.
  *
- * Note [Nonmoving write barrier in Perform{Take,Put}]
+ * Note [Nonmoving write barrier in Perform{Put,Take}]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * As noted in Note [Non-moving garbage collector] in NonMoving.c, the
  * non-moving GC requires that all overwritten pointers be pushed to the update
@@ -1825,7 +1824,7 @@ stg_putMVarzh ( P_ mvar, /* :: MVar a */
         StgMVarTSOQueue_tso(q)  = CurrentTSO;
 
         SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
-        //See Note [Heap memory barriers]
+        // See Note [Heap memory barriers]
         prim_write_barrier;
 
         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
@@ -2036,7 +2035,7 @@ stg_readMVarzh ( P_ mvar, /* :: MVar a */ )
         StgMVarTSOQueue_tso(q)  = CurrentTSO;
 
         SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
-        //See Note [Heap memory barriers]
+        // See Note [Heap memory barriers]
         prim_write_barrier;
 
         StgTSO__link(CurrentTSO)       = q;
@@ -2169,7 +2168,7 @@ stg_readIOPortzh ( P_ ioport /* :: IOPort a */ )
         StgMVarTSOQueue_tso(q)  = CurrentTSO;
 
         SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
-        //See Note [Heap memory barriers]
+        // See Note [Heap memory barriers]
         prim_write_barrier;
 
         StgMVar_head(ioport) = q;
diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c
index 82d9059f24bde4912d8260f4b589e420b2e779c2..7921041a5ab82b006e53ee8d5ff3dcd268fd9986 100644
--- a/rts/ProfHeap.c
+++ b/rts/ProfHeap.c
@@ -1228,7 +1228,6 @@ heapCensusBlock(Census *census, bdescr *bd)
         while (p < bd->free && !*p) p++;
         /* Note [skipping slop in the heap profiler]
          * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-         *
          * We make sure to zero slop that can remain after a major GC so
          * here we can assume any slop words we see until the block's free
          * pointer are zero. Since info pointers are always nonzero we can
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index 39f39a22b4c9e45bfb798dcc1c0b2ea53c884ffe..b668b6a17809d269af355e19df69dc4c74263ca0 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -93,7 +93,7 @@ suspendComputation (Capability *cap, StgTSO *tso, StgUpdateFrame *stop_here)
    throwTo().
 
    Note [Throw to self when masked]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    When a StackOverflow occurs when the thread is masked, we want to
    defer the exception to when the thread becomes unmasked/hits an
    interruptible point.  We already have a mechanism for doing this,
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index c200dcde5d3876da60ffbe32929d0f5055df3c6a..29664831f854397de112d3cd799e071840836fea 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -552,7 +552,7 @@ usage_text[] = {
 
 /**
 Note [Windows Unicode Arguments]
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 On Windows argv is usually encoded in the current Codepage which might not
 support unicode.
 
@@ -2587,7 +2587,7 @@ void freeRtsArgs(void)
 
 /*
 Note [OPTION_SAFE vs OPTION_UNSAFE]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Ticket #3910 originally pointed out that the RTS options are a potential
 security problem. For example the -t -s or -S flags can be used to
 overwrite files. This would be bad in the context of CGI scripts or
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index b2c85b591c062ecd4c93ef724c6b0e886cb4a1ca..e186830b4e273777b99831e3d92bfb91bf20b91b 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -93,7 +93,6 @@ extern char **environ;
 /*
  * Note [Strong symbols]
  * ~~~~~~~~~~~~~~~~~~~~~
- *
  * The notion of a *weak* symbol is fairly common in linking: a symbol is weak
  * if it is declared but not defined, allowing it to be defined by an object
  * which is loaded later. GHC generalizes this notion, allowing symbol
@@ -112,7 +111,6 @@ extern char **environ;
 /*
  * Note [Symbols for MinGW's printf]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  * The printf offered by Microsoft's libc implementation, msvcrt, is quite
  * incomplete, lacking support for even %ull. Consequently mingw-w64 offers its
  * own implementation which we enable. However, to be thread-safe the
@@ -132,7 +130,6 @@ extern char **environ;
  */
 /* Note [_iob_func symbol]
  * ~~~~~~~~~~~~~~~~~~~~~~~
- *
  * Microsoft in VS2013 to VS2015 transition made a backwards incompatible change
  * to the stdio function __iob_func.
  *
diff --git a/rts/Schedule.c b/rts/Schedule.c
index b9b15811c909507911ae04b72e7b80969d1eea08..fa48bef1a7f3e04222201d235e1e464fc313cdf2 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -224,6 +224,7 @@ schedule (Capability *initialCapability, Task *task)
     }
 
     // Note [shutdown]: The interruption / shutdown sequence.
+    // ~~~~~~~~~~~~~~~
     //
     // In order to cleanly shut down the runtime, we want to:
     //   * make sure that all main threads return to their callers
@@ -649,7 +650,7 @@ shouldYieldCapability (Capability *cap, Task *task, bool didGcLast)
     //     and this task it bound).
     //
     // Note [GC livelock]
-    //
+    // ~~~~~~~~~~~~~~~~~~
     // If we are interrupted to do a GC, then we do not immediately do
     // another one.  This avoids a starvation situation where one
     // Capability keeps forcing a GC and the other Capabilities make no
diff --git a/rts/StablePtr.c b/rts/StablePtr.c
index 8f860d480cbad664aada87cb1d42e8058829bcdd..ffd1d0775a4b4f5397d647aaeee6301a9cb261df 100644
--- a/rts/StablePtr.c
+++ b/rts/StablePtr.c
@@ -200,7 +200,7 @@ enlargeStablePtrTable(void)
 }
 
 /* Note [Enlarging the stable pointer table]
- *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * To enlarge the stable pointer table, we allocate a new table, copy the
  * existing entries, and then store the old version of the table in old_SPTs
  * until we free it during GC.  By not immediately freeing the old version
diff --git a/rts/Stats.c b/rts/Stats.c
index 7abe2f7417142f855f8e8397f2f5d987eed43c40..ea57f60e70710ed15066c84cb8a2a2bcbb10d71c 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -764,8 +764,7 @@ StgInt TOTAL_CALLS=1;
 
 /*
 Note [RTS Stats Reporting]
-==========================
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 There are currently three reporting functions:
   * report_summary:
       Responsible for producing '+RTS -s' output.
@@ -948,7 +947,7 @@ static void report_summary(const RTSSummaryStats* sum)
                 sum->productivity_cpu_percent * 100,
                 sum->productivity_elapsed_percent * 100);
 
-    // See Note [Internal Counter Stats] for a description of the
+    // See Note [Internal Counters Stats] for a description of the
     // following counters. If you add a counter here, please remember
     // to update the Note.
     if (RtsFlags.MiscFlags.internalCounters) {
@@ -1474,7 +1473,7 @@ void stat_exit()
 }
 
 /* Note [Work Balance]
-----------------------
+~~~~~~~~~~~~~~~~~~~~~~
 Work balance is a measure of how evenly the work done during parallel garbage
 collection is spread across threads. To compute work balance we must take care
 to account for the number of GC threads changing between GCs. The statistics we
@@ -1553,7 +1552,7 @@ See #13830
 
 /*
 Note [Internal Counters Stats]
------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 What do the counts at the end of a '+RTS -s --internal-counters' report mean?
 They are detailed below. Most of these counters are used by multiple threads
 with no attempt at synchronisation. This means that reported values  may be
diff --git a/rts/StgCRun.c b/rts/StgCRun.c
index 8e536ad6c2419efcca19ea86a0e34dc04a0a86d4..7f5b6d169b0798c08203732674a6d7f7cc305ebc 100644
--- a/rts/StgCRun.c
+++ b/rts/StgCRun.c
@@ -113,7 +113,6 @@ StgFunPtr StgReturn(void)
 /*
  * Note [Stack Alignment on X86]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  * On X86 (both 32bit and 64bit) we keep the stack aligned on function calls at
  * a 16-byte boundary. This is done because on a number of architectures the
  * ABI requires this (x64, Mac OSX 32bit/64bit) as well as interfacing with
@@ -149,7 +148,7 @@ StgFunPtr StgReturn(void)
  * for stg_stop_thread in StgStartup.cmm.
  *
  * Note [Windows Stack allocations]
- *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * On windows the stack has to be allocated 4k at a time, otherwise
  * we get a segfault.  This is done by using a helper ___chkstk_ms that is
  * provided by libgcc.  The Haskell side already knows how to handle this
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index b78fca74cc1068e0522e40450afd2a7b7e84993b..e9186e350061abb7dfb8451abd2a8429c3129278 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -193,7 +193,7 @@ INFO_TABLE_RET( stg_ctoi_V, RET_BCO )
 }
 
 /*   Note [GHCi unboxed tuples stack spills]
-
+     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    In the calling convention for compiled code, a tuple is returned
    in registers, with everything that doesn't fit spilled onto the STG
    stack.
@@ -334,7 +334,7 @@ MK_STG_CTOI_T(62)
 
 /*
   Note [GHCi tuple layout]
-
+  ~~~~~~~~~~~~~~~~~~~~~~~~
   the tuple_info word describes the register and stack usage of the tuple:
 
   [ ssss ssss rrrr rrrr rrrr rrrr rrrr rrrr ]
@@ -900,8 +900,8 @@ INFO_TABLE( stg_COMPACT_NFDATA_DIRTY, 0, 9, COMPACT_NFDATA, "COMPACT_NFDATA", "C
 { foreign "C" barf("COMPACT_NFDATA_DIRTY object (%p) entered!", R1) never returns; }
 
 /* ----------------------------------------------------------------------------
-   Note [CHARLIKE and INTLIKE closures.]
-
+   Note [CHARLIKE and INTLIKE closures]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    These are static representations of Chars and small Ints, so that
    we can remove dynamic Chars and Ints during garbage collection and
    replace them with references to the static objects.
diff --git a/rts/StgStdThunks.cmm b/rts/StgStdThunks.cmm
index 5239496be53a83647d0efe213d91bb11ed0ee1b4..3c528f662fdb5a3b3002ccc6373743dbbaf0ca7f 100644
--- a/rts/StgStdThunks.cmm
+++ b/rts/StgStdThunks.cmm
@@ -53,7 +53,9 @@
  * because LDV profiling relies on entering closures to mark them as
  * "used".
  *
- * Note [untag for prof]: when we enter a closure, the convention is
+ * Note [untag for prof]
+ * ~~~~~~~~~~~~~~~~~~~~~
+ * When we enter a closure, the convention is
  * that the closure pointer passed in the first argument is
  * *untagged*.  Without profiling we don't have to worry about this,
  * because we never enter a tagged pointer.
diff --git a/rts/Task.h b/rts/Task.h
index 9b6a8e8d7b5d30c94a38614fc3ae8567e5c654fd..fd7b68aecf122b4bdb17aeac58d27d456f9f0d74 100644
--- a/rts/Task.h
+++ b/rts/Task.h
@@ -18,7 +18,6 @@
 /*
    Note [Definition of a Task]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
    A task is an OSThread that runs Haskell code.  Every OSThread that
    runs inside the RTS, whether as a worker created by the RTS or via
    an in-call from C to Haskell, has an associated Task.  The first
@@ -35,7 +34,6 @@
 
    Note [Ownership of Task]
    ~~~~~~~~~~~~~~~~~~~~~~~~
-
    Task ownership is a little tricky.  The default situation is that
    the Task is an OS-thread-local structure that is owned by the OS
    thread named in task->id.  An OS thread not currently executing
diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c
index c94b95afabba8d45037c0c1e2001831b70a899d9..ffa1168a279d5abcd1bee0206c44fee8592cba43 100644
--- a/rts/ThreadPaused.c
+++ b/rts/ThreadPaused.c
@@ -252,7 +252,7 @@ threadPaused(Capability *cap, StgTSO *tso)
         retry:
 #endif
             // Note [suspend duplicate work]
-            //
+            // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             // If the info table is a WHITEHOLE or a BLACKHOLE, then
             // another thread has claimed it (via the SET_INFO()
             // below), or is in the process of doing so.  In that case
diff --git a/rts/Threads.c b/rts/Threads.c
index ab7af2e52ca366e521fbc8a7174d5e2f2b9dd7f1..1972f14895b5401fa65b3a19f6e1270a12ce0841 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -570,7 +570,7 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
                                  stg_min(tso->stackobj->stack + tso->stackobj->stack_size,
                                          tso->stackobj->sp+64)));
 
-        // Note [Throw to self when masked], also #767 and #8303.
+        // See Note [Throw to self when masked], also #767 and #8303.
         throwToSelf(cap, tso, (StgClosure *)stackOverflow_closure);
         return;
     }
diff --git a/rts/Timer.c b/rts/Timer.c
index ec3dff0a5c52901d14bd79d7c03727e87430b21e..e6666856a6c909d064fc764bfb2634a0f782e869 100644
--- a/rts/Timer.c
+++ b/rts/Timer.c
@@ -44,8 +44,7 @@ static int ticks_to_eventlog_flush = 0;
 
 /*
  Note [GC During Idle Time]
- --------------------------
-
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
  In the threaded RTS, a major GC can be performed during idle time (i.e., when
  no Haskell computations are ready to run).  This can be beneficial for two
  reasons.  First, running the GC during idle time makes it less likely that a GC
diff --git a/rts/TraverseHeap.h b/rts/TraverseHeap.h
index 0bc553e094dbcc1b578102e67b1ade132ab1ea65..2ac20e9cc541d15f12f5adc5e60606d3529e1409 100644
--- a/rts/TraverseHeap.h
+++ b/rts/TraverseHeap.h
@@ -97,7 +97,6 @@ typedef struct stackElement_ {
 typedef struct traverseState_ {
     /** Note [Profiling heap traversal visited bit]
      * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-     *
      * If the RTS is compiled with profiling enabled StgProfHeader can be used
      * by profiling code to store per-heap object information. Specifically the
      * 'hp_hdr' field is used to store heap profiling information.
diff --git a/rts/Updates.cmm b/rts/Updates.cmm
index d459607752153e2ed12cc6eab5f410322041631a..57839216c4185ad1f38bc34f293d907107da118d 100644
--- a/rts/Updates.cmm
+++ b/rts/Updates.cmm
@@ -91,7 +91,7 @@ INFO_TABLE_RET ( stg_bh_upd_frame, UPDATE_FRAME,
 }
 
 /* Note [HpAlloc]
- *
+ * ~~~~~~~~~~~~~~
  * HpAlloc is required to be zero unless we just bumped Hp and failed
  * the heap check: see HeapStackCheck.cmm.  Failures that result from
  * HpAlloc being non-zero are very hard to track down, because they
diff --git a/rts/include/Stg.h b/rts/include/Stg.h
index 156c3a283cf14cf39570551a8479966fc7e42b13..be0995445bc404016296f15375d86282b4a6b493 100644
--- a/rts/include/Stg.h
+++ b/rts/include/Stg.h
@@ -268,8 +268,10 @@ typedef StgFunPtr       F_;
 /* foreign functions: */
 #define EFF_(f)   void f() /* See Note [External function prototypes] */
 
-/* Note [External function prototypes]  See #8965, #11395
+/* Note [External function prototypes]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+(see #8965, #11395)
+
 In generated C code we need to distinct between two types
 of external symbols:
 1.  Cmm functions declared by 'EF_' macro (External Functions)
diff --git a/rts/include/rts/Flags.h b/rts/include/rts/Flags.h
index 2936876b7a255761029e782fb2cafa3a3336143f..eb38e8079462e10b6b4ab5c344482ea9249126ae 100644
--- a/rts/include/rts/Flags.h
+++ b/rts/include/rts/Flags.h
@@ -22,7 +22,7 @@
 /* For defaults, see the @initRtsFlagsDefaults@ routine. */
 
 /* Note [Synchronization of flags and base APIs]
- *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * We provide accessors to RTS flags in base. (GHC.RTS module)
  * The API should be updated whenever RTS flags are modified.
  */
@@ -230,7 +230,7 @@ typedef struct _MISC_FLAGS {
                                           memory management for non-GC related
                                           tasks in the future, we'd respect it
                                           there as well. */
-    bool internalCounters;       /* See Note [Internal Counter Stats] */
+    bool internalCounters;       /* See Note [Internal Counters Stats] */
     bool linkerAlwaysPic;        /* Assume the object code is always PIC */
     StgWord linkerMemBase;       /* address to ask the OS for memory
                                   * for the linker, NULL ==> off */
diff --git a/rts/include/rts/Libdw.h b/rts/include/rts/Libdw.h
index d7bd55d06edde612559febf999aa3af14519c687..7076611dfd3593c6dc8fc76c8e3de7de032752e5 100644
--- a/rts/include/rts/Libdw.h
+++ b/rts/include/rts/Libdw.h
@@ -18,8 +18,8 @@
 /*
  * Note [Chunked stack representation]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  * Consider the stack,
+ *
  *     main                   calls                        (bottom of stack)
  *       func1                which in turn calls
  *         func2              which calls
diff --git a/rts/include/rts/OSThreads.h b/rts/include/rts/OSThreads.h
index d24a1313a6ca2c82106cebb0d4741b1cc5bae195..77241341b7d936dda5eb660a52ba02fb29c58904 100644
--- a/rts/include/rts/OSThreads.h
+++ b/rts/include/rts/OSThreads.h
@@ -114,6 +114,7 @@ typedef DWORD ThreadLocalKey;
 #define INIT_COND_VAR  0
 
 /* Note [SRW locks]
+   ~~~~~~~~~~~~~~~~
    We have a choice for implementing Mutexes on Windows.  Standard
    Mutexes are kernel objects that require kernel calls to
    acquire/release, whereas CriticalSections are spin-locks that block
diff --git a/rts/include/rts/prof/CCS.h b/rts/include/rts/prof/CCS.h
index 7685f03003a58c308029b49ad67329ed307e7624..a155e1385ba2a12cca4e2b54a3eba4f2379ec401 100644
--- a/rts/include/rts/prof/CCS.h
+++ b/rts/include/rts/prof/CCS.h
@@ -18,6 +18,7 @@
  * ---------------------------------------------------------------------------*/
 /*
  * Note [struct alignment]
+ * ~~~~~~~~~~~~~~~~~~~~~~~
  * NB. be careful to avoid unwanted padding between fields, by
  * putting the 8-byte fields on an 8-byte boundary.  Padding can
  * vary between C compilers, and we don't take into account any
diff --git a/rts/include/rts/storage/Block.h b/rts/include/rts/storage/Block.h
index 730947e375bc4417d2557ecfc150ef24048fc863..141ec777be551cf3bfed2c895ff84912e394b804 100644
--- a/rts/include/rts/storage/Block.h
+++ b/rts/include/rts/storage/Block.h
@@ -28,7 +28,7 @@
 #define BLOCK_SIZE   (1<<BLOCK_SHIFT)
 #else
 #define BLOCK_SIZE   (UNIT<<BLOCK_SHIFT)
-// Note [integer overflow]
+// See Note [integer overflow]
 #endif
 
 #define BLOCK_SIZE_W (BLOCK_SIZE/sizeof(W_))
@@ -43,7 +43,7 @@
 #define MBLOCK_SIZE    (1<<MBLOCK_SHIFT)
 #else
 #define MBLOCK_SIZE    (UNIT<<MBLOCK_SHIFT)
-// Note [integer overflow]
+// See Note [integer overflow]
 #endif
 
 #define MBLOCK_SIZE_W  (MBLOCK_SIZE/sizeof(W_))
@@ -60,7 +60,7 @@
 
 /*
  * Note [integer overflow]
- *
+ * ~~~~~~~~~~~~~~~~~~~~~~~
  * The UL suffix in BLOCK_SIZE and MBLOCK_SIZE promotes the expression
  * to an unsigned long, which means that expressions involving these
  * will be promoted to unsigned long, which makes integer overflow
diff --git a/rts/include/rts/storage/ClosureMacros.h b/rts/include/rts/storage/ClosureMacros.h
index b841ef8be06c46fcd14895c52dc75a7be4e15a60..393bee3a9f981747067423357c45e98dba35968a 100644
--- a/rts/include/rts/storage/ClosureMacros.h
+++ b/rts/include/rts/storage/ClosureMacros.h
@@ -480,10 +480,11 @@ INLINE_HEADER StgWord8 *mutArrPtrsCard (StgMutArrPtrs *a, W_ n)
    Replacing a closure with a different one.  We must call
    OVERWRITING_CLOSURE(p) on the old closure that is about to be
    overwritten.
+ */
 
+ /*
    Note [zeroing slop when overwriting closures]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
    When we overwrite a closure in the heap with a smaller one, in some scenarios
    we need to write zero words into "slop"; the memory that is left
    unoccupied. See Note [slop on the heap]
diff --git a/rts/include/rts/storage/Closures.h b/rts/include/rts/storage/Closures.h
index b28315f76a7574d33f54ebe7f65719b375e0c53a..44188f33944e08b140cb6dae28ffffecab4e74c2 100644
--- a/rts/include/rts/storage/Closures.h
+++ b/rts/include/rts/storage/Closures.h
@@ -491,6 +491,6 @@ typedef struct StgCompactNFData_ {
       // Used temporarily to store the result of compaction.  Doesn't need to be
       // a GC root.
     struct StgCompactNFData_ *link;
-      // Used by compacting GC for linking CNFs with threaded hash tables. See
-      // Note [CNFs in compacting GC] in Compact.c for details.
+      // Used by compacting GC for linking CNFs with threaded hash tables.
+      // See Note [CNFs in compacting GC] in Compact.c for details.
 } StgCompactNFData;
diff --git a/rts/include/rts/storage/InfoTables.h b/rts/include/rts/storage/InfoTables.h
index 55d9ad6542f1b8f89d00cc9395f91bbb5af151d5..55aba6b4d7d9d7e8062588041ec8e5eee27b3a5b 100644
--- a/rts/include/rts/storage/InfoTables.h
+++ b/rts/include/rts/storage/InfoTables.h
@@ -235,7 +235,6 @@ typedef struct StgInfoTable_ {
 /*
  * Note [Encoding static reference tables]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
  * As static reference tables appear frequently in code, we use a special
  * compact encoding for the common case of a module defining only a few CAFs: We
  * produce one table containing a list of CAFs in the module and then include a
diff --git a/rts/include/rts/storage/TSO.h b/rts/include/rts/storage/TSO.h
index 874d61ab604826b6ffe32394c58dc4a90028f48b..d21cd7a6454653d8f2a8c9029770dde3d655aa8e 100644
--- a/rts/include/rts/storage/TSO.h
+++ b/rts/include/rts/storage/TSO.h
@@ -191,10 +191,9 @@ typedef struct StgTSO_ {
 
 /* Note [StgStack dirtiness flags and concurrent marking]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * Without concurrent collection by the nonmoving collector the stack dirtiness story
- * is quite simple: The stack is either STACK_DIRTY (meaning it has been added to mut_list)
- * or not.
+ * Without concurrent collection by the nonmoving collector the stack dirtiness
+ * story is quite simple: The stack is either STACK_DIRTY (meaning it has been
+ * added to mut_list) or not.
  *
  * However, things are considerably more complicated with concurrent collection
  * (namely, when nonmoving_write_barrier_enabled is set): In addition to adding
diff --git a/rts/include/stg/SMP.h b/rts/include/stg/SMP.h
index a1a714f4c97e60b29e5f8a1473bf51831acbafe2..f672009c76a9f8c5f4d6eb6864d6f295fd0b790b 100644
--- a/rts/include/stg/SMP.h
+++ b/rts/include/stg/SMP.h
@@ -107,7 +107,6 @@ EXTERN_INLINE void load_load_barrier(void);
 /*
  * Note [Heap memory barriers]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  * Machines with weak memory ordering semantics have consequences for how
  * closures are observed and mutated. For example, consider a thunk that needs
  * to be updated to an indirection. In order for the indirection to be safe for
diff --git a/rts/linker/Elf.c b/rts/linker/Elf.c
index c5d009639e97cad63fe7b21071aac2709d54ef88..76145dbbf4bfaa90db2e3c673ad29a0afbbc073c 100644
--- a/rts/linker/Elf.c
+++ b/rts/linker/Elf.c
@@ -110,9 +110,8 @@
 #endif
 
 /*
-
    Note [Many ELF Sections]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~
    The normal section number fields in ELF are limited to 16 bits, which runs
    out of bits when you try to cram in more sections than that.
 
@@ -1245,6 +1244,7 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
 
            if(needs_veneer) { /* overflow or thum interworking */
                // Note [PC bias]
+               // ~~~~~~~~~~~~~~
                // From the ELF for the ARM Architecture documentation:
                // > 4.6.1.1 Addends and PC-bias compensation
                // > A binary file may use REL or RELA relocations or a mixture
diff --git a/rts/linker/LoadArchive.c b/rts/linker/LoadArchive.c
index 041ebef4b61f9d454e713a70676c365d7f85eaf2..ff8630d57e79618fa01232b02f3d81eb671f0f37 100644
--- a/rts/linker/LoadArchive.c
+++ b/rts/linker/LoadArchive.c
@@ -468,6 +468,7 @@ static HsInt loadArchive_ (pathchar *path)
 #if defined(OBJFORMAT_PEi386)
         /*
         * Note [MSVC import files (ext .lib)]
+        * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         * MSVC compilers store the object files in
         * the import libraries with extension .dll
         * so on Windows we should look for those too.
diff --git a/rts/linker/M32Alloc.c b/rts/linker/M32Alloc.c
index cd8751b3b0442215705a5eff852951027082eaa0..69613d8d7c77eb0f892bdc4cde1d3e35b65bf4b1 100644
--- a/rts/linker/M32Alloc.c
+++ b/rts/linker/M32Alloc.c
@@ -18,10 +18,8 @@
 #include <stdio.h>
 
 /*
-
 Note [Compile Time Trickery]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 This file implements two versions of each of the `m32_*` functions. At the top
 of the file there is the real implementation (compiled in when
 `NEED_M32` is true) and a dummy implementation that exists only to
@@ -45,10 +43,8 @@ still check the call for syntax and correct function parameter types.
 #if defined(NEED_M32)
 
 /*
-
 Note [M32 Allocator]
 ~~~~~~~~~~~~~~~~~~~~
-
 A memory allocator that allocates only pages in the 32-bit range (lower 2GB).
 This is useful on 64-bit platforms to ensure that addresses of allocated
 objects can be referenced with a 32-bit relative offset.
diff --git a/rts/linker/PEi386.c b/rts/linker/PEi386.c
index f186da0af848834debe655e9581f0150252bed21..011e47a21b20874ca443acf3e1ba82ce3c403f08 100644
--- a/rts/linker/PEi386.c
+++ b/rts/linker/PEi386.c
@@ -59,7 +59,6 @@
 
    Note [BFD import library]
    ~~~~~~~~~~~~~~~~~~~~~~~~~
-
    On Windows, compilers don't link directly to dynamic libraries.
    The reason for this is that the exports are not always by symbol, the
    Import Address Table (IAT) also allows exports by ordinal number
@@ -128,7 +127,6 @@
 
    Note [Memory allocation]
    ~~~~~~~~~~~~~~~~~~~~~~~~
-
    Previously on Windows we would use VirtualAlloc to allocate enough space for
    loading the entire object file into memory and keep it there for the duration
    until the entire object file has been unloaded.
@@ -166,7 +164,6 @@
 
    Note [Section alignment]
    ~~~~~~~~~~~~~~~~~~~~~~~~
-
    The Windows linker aligns memory to it's section alignment requirement by
    aligning it during the copying to the private heap. We also ensure that the
    trampoline "region" we reserve is 8 bytes aligned.
@@ -1996,7 +1993,7 @@ ocResolve_PEi386 ( ObjectCode* oc )
 
 /*
   Note [ELF constant in PE file]
-
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   For some reason, the PE files produced by GHC contain a linux
   relocation constant 17 (0x11) in the object files. As far as I (Phyx-) can tell
   this constant doesn't seem like it's coming from GHC, or at least I could not find
diff --git a/rts/linker/PEi386.h b/rts/linker/PEi386.h
index 4c33dfd4d9c41d87f404f577d20c982efeee70d2..8e6e844efb083c5bc2c5b5bceacb02e929b978b9 100644
--- a/rts/linker/PEi386.h
+++ b/rts/linker/PEi386.h
@@ -158,7 +158,7 @@ uint8_t* getSymShortName ( COFF_HEADER_INFO *info, COFF_symbol* sym );
 
 /*
 Note [mingw-w64 name decoration scheme]
-
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 What's going on with name decoration? Well, original code
 have some crufty and ad-hocish paths related mostly to very old
 mingw gcc/binutils/runtime combinations. Now mingw-w64 offers pretty
diff --git a/rts/linker/elf_plt_arm.c b/rts/linker/elf_plt_arm.c
index bd21243ec4a0c2fa0cd5636be223269907f6dfb8..5b67bf8ac45fd0ff9e34f17aafdcf1e964e0046f 100644
--- a/rts/linker/elf_plt_arm.c
+++ b/rts/linker/elf_plt_arm.c
@@ -58,7 +58,6 @@ bool makeStubArmThm(Stub * s);
 /*
   Note [The ARM/Thumb Story]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~
-
   Support for the ARM architecture is complicated by the fact that ARM has not
   one but several instruction encodings. The two relevant ones here are the
   original ARM encoding and Thumb, a more dense variant of ARM supporting only
diff --git a/rts/linker/elf_reloc_aarch64.c b/rts/linker/elf_reloc_aarch64.c
index d8c4f8b72482b3d20e2d249a6041684a5f587e13..790378ab0e1024b8dc5dc8a961f1791c39feefc2 100644
--- a/rts/linker/elf_reloc_aarch64.c
+++ b/rts/linker/elf_reloc_aarch64.c
@@ -229,6 +229,7 @@ computeAddend(Section * section, Elf_Rel * rel,
             /* note: we are encoding bits [27:2] */
             if(!isInt64(26+2, V)) {
                 // Note [PC bias aarch64]
+                // ~~~~~~~~~~~~~~~~~~~~~~
                 // There is no PC bias to accommodate in the
                 // relocation of a place containing an instruction
                 // that formulates a PC-relative address. The program
diff --git a/rts/linker/elf_tlsgd.c b/rts/linker/elf_tlsgd.c
index ec42e29ac67aeac3142f6e54115422e63525286f..a22ed0b731de5fec29350d31cf6b367db13e9e02 100644
--- a/rts/linker/elf_tlsgd.c
+++ b/rts/linker/elf_tlsgd.c
@@ -4,7 +4,7 @@
 
 /*
  * Note [TLSGD relocation]
- *
+ * ~~~~~~~~~~~~~~~~~~~~~~~
  * Quick background: FreeBSD's <ctype.h> is poisoned with static inline code
  * that gets compiled into every program that uses functions like isdigit(3).
  * When compiled "-c -fpic" for inclusion in position-independent ".a" files
diff --git a/rts/posix/OSMem.c b/rts/posix/OSMem.c
index fff2f1e590ab77fd57aff0af621ecbedb82b1eac..822546d5d1eabad7eb7f2139e6fdfd20b14bf868 100644
--- a/rts/posix/OSMem.c
+++ b/rts/posix/OSMem.c
@@ -652,7 +652,7 @@ void osCommitMemory(void *at, W_ size)
 }
 
 /* Note [MADV_FREE and MADV_DONTNEED]
- *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * madvise() provides flags with which one can release no longer needed pages
  * back to the kernel without having to munmap() (which is expensive).
  *
diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c
index a6bd3b69f09c85d9c78b3e6e542dce267eebae24..1f40402c63471e24f2501b57c07a4ae6417fd1bd 100644
--- a/rts/sm/CNF.c
+++ b/rts/sm/CNF.c
@@ -36,7 +36,6 @@
 /*
   Note [Compact Normal Forms]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
   A compact normal form (CNF) is a region of memory containing one or more
   Haskell data structures.  The goals are:
 
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 0e0e887b1e675eddaa442106aa37b87e70947f46..834df459b40dd50c361b71aa6aaa7095d7f84b06 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -43,7 +43,6 @@
 
 /* Note [Selector optimisation depth limit]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  * MAX_THUNK_SELECTOR_DEPTH is used to avoid long recursion of
  * eval_thunk_selector due to nested selector thunks. Note that this *only*
  * counts nested selector thunks, e.g. `fst (fst (... (fst x)))`. The collector
@@ -174,7 +173,6 @@ alloc_for_copy (uint32_t size, uint32_t gen_no)
 /*
  * Note [Non-moving GC: Marking evacuated objects]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  * When the non-moving collector is in use we must be careful to ensure that any
  * references to objects in the non-moving generation from younger generations
  * are pushed to the mark queue.
@@ -695,7 +693,7 @@ loop:
   if (!HEAP_ALLOCED_GC(q)) {
       if (!major_gc) return;
 
-      // Note [Object unloading] in CheckUnload.c
+      // See Note [Object unloading] in CheckUnload.c
       if (RTS_UNLIKELY(unload_mark_needed)) {
           markObjectCode(q);
       }
@@ -933,7 +931,7 @@ loop:
               return;
           }
           // Note [BLACKHOLE pointing to IND]
-          //
+          // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           // BLOCKING_QUEUE can be overwritten by IND (see
           // wakeBlockingQueue()). However, when this happens we must
           // be updating the BLACKHOLE, so the BLACKHOLE's indirectee
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 64d0924059d70239a67f720ebf13062daeefa8ac..15aef3a9fc54c908aa9bfab8808693f3947ae2e5 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -158,6 +158,7 @@ StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)]
 #endif // THREADED_RTS
 
 /* Note [n_gc_threads]
+   ~~~~~~~~~~~~~~~~~~~
 This is a global variable that originally tracked the number of threads
 participating in the current gc. It's meaning has diverged from this somewhat,
 as it does not distinguish betweeen idle and non-idle threads. An idle thread
@@ -2197,7 +2198,7 @@ bool doIdleGCWork(Capability *cap STG_UNUSED, bool all)
 
 
 /* Note [Synchronising work stealing]
- *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * During parallel garbage collections, idle gc threads will steal work from
  * other threads. If they see no work to steal then they will wait on a
  * condition variabl(gc_running_cv).
@@ -2243,6 +2244,7 @@ bool doIdleGCWork(Capability *cap STG_UNUSED, bool all)
  * */
 
 /* Note [Scaling retained memory]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * Tickets: #19381 #19359 #14702
  *
  * After a spike in memory usage we have been conservative about returning
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index 627c95fb42a9accfc6a149803e3940cbefa7f554..9d57bf7d9e048e805c079a1b8516d700a8d7648d 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -203,7 +203,7 @@ push_todo_block(bdescr *bd, gen_workspace *ws)
 }
 
 /* Note [big objects]
-
+   ~~~~~~~~~~~~~~~~~~
    We can get an ordinary object (CONSTR, FUN, THUNK etc.) that is
    larger than a block (see #7919).  Let's call these "big objects".
    These objects don't behave like large objects - they live in
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
index dd019ec18b14ba558f6126a40183cb4836ae4242..a918f422cf969a6a5e61de150e76e87faa1cd901 100644
--- a/rts/sm/NonMoving.c
+++ b/rts/sm/NonMoving.c
@@ -229,7 +229,7 @@ Mutex concurrent_coll_finished_lock;
  *  - Note [StgStack dirtiness flags and concurrent marking] (TSO.h) describes
  *    the protocol for concurrent marking of stacks.
  *
- *  - Note [Nonmoving write barrier in Perform{Take,Put}] (PrimOps.cmm) describes
+ *  - Note [Nonmoving write barrier in Perform{Put,Take}] (PrimOps.cmm) describes
  *    a tricky barrier necessary when resuming threads blocked on MVar
  *    operations.
  *
@@ -328,8 +328,8 @@ Mutex concurrent_coll_finished_lock;
  * The implementation details of this are described in Note [Non-moving GC:
  * Marking evacuated objects] in Evac.c.
  *
- * Note [Deadlock detection under the non-moving collector]
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Note [Deadlock detection under nonmoving collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * In GHC the garbage collector is responsible for identifying deadlocked
  * programs. Providing for this responsibility is slightly tricky in the
  * non-moving collector due to the existence of aging. In particular, the
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index 2fd85dc4f0bdf09fca4f7293cb37aa326d998b74..87b8f774bdfe3d9dd8ba2101b468a36d1e74c1e6 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -159,7 +159,6 @@ StgIndStatic *debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST;
  *
  * Note [Eager update remembered set flushing]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  * We eagerly flush update remembered sets during minor GCs to avoid scenarios
  * like the following which could result in long sync pauses:
  *
@@ -199,7 +198,6 @@ StgIndStatic *debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST;
  *
  * Note [Concurrent read barrier on deRefWeak#]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  * In general the non-moving GC assumes that all pointers reachable from a
  * marked object are themselves marked (or in the mark queue). However,
  * weak pointers are an obvious exception to this rule. In particular,
@@ -596,7 +594,7 @@ inline void updateRemembSetPushThunk(Capability *cap, StgThunk *thunk)
  * we update the indirectee to ensure that the thunk's free variables remain
  * visible to the concurrent collector.
  *
- * See Note [Update rememembered set].
+ * See Note [Update remembered set].
  */
 void updateRemembSetPushThunkEager(Capability *cap,
                                    const StgThunkInfoTable *info,
diff --git a/rts/sm/NonMovingScav.c b/rts/sm/NonMovingScav.c
index 4fcbc5881c16f09efaf96595157b4e430dbb67d4..56ebe5ffe4723e83202866481b8dd39443aad358 100644
--- a/rts/sm/NonMovingScav.c
+++ b/rts/sm/NonMovingScav.c
@@ -32,7 +32,7 @@ nonmovingScavengeOne (StgClosure *q)
         if (gct->failed_to_evac) {
             mvar->header.info = &stg_MVAR_DIRTY_info;
 
-            // Note [Dirty flags in the non-moving collector] in NonMoving.c
+            // See Note [Dirty flags in the non-moving collector] in NonMoving.c
             markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure *) mvar->head);
             markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure *) mvar->tail);
             markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure *) mvar->value);
@@ -52,7 +52,7 @@ nonmovingScavengeOne (StgClosure *q)
         if (gct->failed_to_evac) {
             tvar->header.info = &stg_TVAR_DIRTY_info;
 
-            // Note [Dirty flags in the non-moving collector] in NonMoving.c
+            // See Note [Dirty flags in the non-moving collector] in NonMoving.c
             markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure *) tvar->current_value);
             markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure *) tvar->first_watch_queue_entry);
         } else {
@@ -177,7 +177,7 @@ nonmovingScavengeOne (StgClosure *q)
         if (gct->failed_to_evac) {
             ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
 
-            // Note [Dirty flags in the non-moving collector] in NonMoving.c
+            // See Note [Dirty flags in the non-moving collector] in NonMoving.c
             markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure *) mv->var);
         } else {
             ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
diff --git a/rts/sm/NonMovingSweep.c b/rts/sm/NonMovingSweep.c
index 1a7c97b7e68ca4b56a8a7d1b717cf97c935b5ef5..5c4752d4a3025fa6be4ad9946adb19d18506ac65 100644
--- a/rts/sm/NonMovingSweep.c
+++ b/rts/sm/NonMovingSweep.c
@@ -370,7 +370,6 @@ void nonmovingSweepStableNameTable()
 
     /* Note [Sweeping stable names in the concurrent collector]
      * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-     *
      * When collecting concurrently we need to take care to avoid freeing
      * stable names the we didn't sweep this collection cycle. For instance,
      * consider the following situation:
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index cf4e2dfea698457f8b2c976434a757cf01181182..9c2ccc2c41ed867d676f837f3d1498fec3c3a112 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -909,7 +909,6 @@ static void checkGeneration (generation *gen,
 #if defined(THREADED_RTS)
     // Note [heap sanity checking with SMP]
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    //
     // heap sanity checking doesn't work with SMP for two reasons:
     //
     //   * We can't zero the slop. However, we can sanity-check the heap after a
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index a36ebbb331f35a27bca17fe1bbcd13e39867ec3d..b121c010ca1e8d7674d94bff98a2722b8b620da6 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -1858,7 +1858,7 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
 
     case UPDATE_FRAME:
         // Note [upd-black-hole]
-        //
+        // ~~~~~~~~~~~~~~~~~~~~~
         // In SMP, we can get update frames that point to indirections
         // when two threads evaluate the same thunk.  We do attempt to
         // discover this situation in threadPaused(), but it's
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index ede47d3eb27851044f89ee1f9d96f9744e629b78..c592595737cb28f11a0151aba602c1eed023d3b0 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -399,7 +399,6 @@ void listAllBlocks (ListBlocksCb cb, void *user)
 /* -----------------------------------------------------------------------------
    Note [CAF management]
    ~~~~~~~~~~~~~~~~~~~~~
-
    The entry code for every CAF does the following:
 
       - calls newCAF, which builds a CAF_BLACKHOLE on the heap and atomically
@@ -434,7 +433,6 @@ void listAllBlocks (ListBlocksCb cb, void *user)
    ------------------
    Note [atomic CAF entry]
    ~~~~~~~~~~~~~~~~~~~~~~~
-
    With THREADED_RTS, newCAF() is required to be atomic (see
    #5558). This is because if two threads happened to enter the same
    CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
@@ -448,7 +446,6 @@ void listAllBlocks (ListBlocksCb cb, void *user)
    ------------------
    Note [GHCi CAFs]
    ~~~~~~~~~~~~~~~~
-
    For GHCI, we have additional requirements when dealing with CAFs:
 
       - we must *retain* all dynamically-loaded CAFs ever entered,
@@ -470,7 +467,6 @@ void listAllBlocks (ListBlocksCb cb, void *user)
    ------------------
    Note [Static objects under the nonmoving collector]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
    Static object management is a bit tricky under the nonmoving collector as we
    need to maintain a bit more state than in the moving collector. In
    particular, the moving collector uses the low bits of the STATIC_LINK field
@@ -597,6 +593,7 @@ newCAF(StgRegTable *reg, StgIndStatic *caf)
     if(keepCAFs && !(highMemDynamic && (void*) caf > (void*) 0x80000000))
     {
         // Note [dyn_caf_list]
+        // ~~~~~~~~~~~~~~~~~~~
         // If we are in GHCi _and_ we are using dynamic libraries,
         // then we can't redirect newCAF calls to newRetainedCAF (see below),
         // so we make newCAF behave almost like newRetainedCAF.
@@ -990,7 +987,6 @@ accountAllocation(Capability *cap, W_ n)
 
 /* Note [slop on the heap]
  * ~~~~~~~~~~~~~~~~~~~~~~~
- *
  * We use the term "slop" to refer to allocated memory on the heap which isn't
  * occupied by any closure. Usually closures are packet tightly into the heap
  * blocks, storage for one immediately following another. However there are
@@ -1549,7 +1545,7 @@ dirty_MVAR(StgRegTable *reg, StgClosure *p, StgClosure *old_val)
 
 /* -----------------------------------------------------------------------------
  * Note [allocation accounting]
- *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *   - When cap->r.rCurrentNusery moves to a new block in the nursery,
  *     we add the size of the used portion of the previous block to
  *     cap->total_allocated. (see finishedNurseryBlock())
@@ -1825,7 +1821,6 @@ _bdescr (StgPtr p)
 /*
 Note [Sources of Block Level Fragmentation]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 Block level fragmentation is when there is unused space in megablocks.
 The amount of fragmentation can be calculated as the difference between the
 total size of allocated blocks and the total size of allocated megablocks.
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h
index 48ddcf35f51ba6cf50db9fd8d89330e29921744d..00f2943a51b53a58bf0fa0a3d14aadbd7b23aa0a 100644
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -82,7 +82,7 @@ bool doYouWantToGC(Capability *cap)
 /* -----------------------------------------------------------------------------
    Allocation accounting
 
-   See [Note allocation accounting] in Storage.c
+   See Note [allocation accounting] in Storage.c
    -------------------------------------------------------------------------- */
 
 //
@@ -126,7 +126,7 @@ void move_STACK (StgStack *src, StgStack *dest);
 
 /* -----------------------------------------------------------------------------
    Note [STATIC_LINK fields]
-
+   ~~~~~~~~~~~~~~~~~~~~~~~~~
    The low 2 bits of the static link field have the following meaning:
 
    00     we haven't seen this static object before
@@ -175,7 +175,7 @@ extern uint32_t prev_static_flag, static_flag;
 
 /* -----------------------------------------------------------------------------
    Note [CAF lists]
-
+   ~~~~~~~~~~~~~~~~
    dyn_caf_list  (CAFs chained through static_link)
       This is a chain of all CAFs in the program, used for
       dynamically-linked GHCi.
diff --git a/rts/win32/OSMem.c b/rts/win32/OSMem.c
index dde1a74bbb3370040b38b2969e5238a4e3deb65b..c192fb5923e7e1b21b6d64bb56a9c95af101d08b 100644
--- a/rts/win32/OSMem.c
+++ b/rts/win32/OSMem.c
@@ -547,6 +547,7 @@ void osBindMBlocksToNode(
         void* temp;
         if (RtsFlags.GcFlags.numa) {
             /* Note [base memory]
+               ~~~~~~~~~~~~~~~~~~
                I would like to use addr here to specify the base
                memory of allocation. The problem is that the address
                we are requesting is too high. I can't figure out if it's
diff --git a/rules/build-package-way.mk b/rules/build-package-way.mk
index 5ac137635e50a8d9f3e1998b9224724f969afd56..2f7af28ecb5a89d26ff865e4137afea272190f47 100644
--- a/rules/build-package-way.mk
+++ b/rules/build-package-way.mk
@@ -28,7 +28,7 @@ $1_$2_$3_LIB = $1/$2/build/$$($1_$2_$3_LIB_FILE)
 $$($1_$2_COMPONENT_ID)_$2_$3_LIB = $$($1_$2_$3_LIB)
 
 # Note [inconsistent distdirs]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # hack: the DEPS_LIBS mechanism assumes that the distdirs for packages
 # that depend on each other are the same, but that is not the case for
 # ghc where we use stage1/stage2 rather than dist/dist-install.
diff --git a/rules/build-prog.mk b/rules/build-prog.mk
index e7764d75a1e3b9b13b9839a3410d25f67dc29f09..8d2bcd25c50d4b5ba1fe03c25cc5560e1c01e74f 100644
--- a/rules/build-prog.mk
+++ b/rules/build-prog.mk
@@ -277,7 +277,9 @@ $1/$2/build/tmp/$$($1_$2_PROG) : $$($1_$2_$$($1_$2_PROGRAM_WAY)_HS_OBJS) $$($1_$
 endif
 endif # $1_$2_PROG_NEEDS_C_WRAPPER
 
-# Note [lib-depends] if this program is built with stage1 or greater, we
+# Note [lib-depends]
+# ~~~~~~~~~~~~~~~~~~
+# If this program is built with stage1 or greater, we
 # need to depend on the libraries too.  NB. since $(ALL_STAGE1_LIBS) and
 # $(ALL_RTS_LIBS) are not defined until after libraries/*/ghc.mk have
 # been included, this introduces an ordering dependency.
diff --git a/rules/hs-suffix-way-rules.mk b/rules/hs-suffix-way-rules.mk
index da3d368c0bff10acf84b6246138f25e2ecdd432c..e75d7b458ed885d3a2734303b23ea421f7ac9ccb 100644
--- a/rules/hs-suffix-way-rules.mk
+++ b/rules/hs-suffix-way-rules.mk
@@ -30,7 +30,7 @@ $1/$2/build/%.$$(dyn_osuf)-boot: $1/$2/build/%.$$(v_hisuf)-boot
 else
 
 # Note [Implicit rule search algorithm]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # The order in which implicit rules are defined can influence a build.
 #
 # Case study: genprimops/Lexer.hs
diff --git a/testsuite/driver/runtests.py b/testsuite/driver/runtests.py
index 00564a48dc1bdd2f2f55ae501104f23cd047329a..219fb41001a345b3bfa10039344987ad857c1cbb 100644
--- a/testsuite/driver/runtests.py
+++ b/testsuite/driver/runtests.py
@@ -601,7 +601,7 @@ else:
 cleanup_and_exit(exitcode)
 
 # Note [Running tests in /tmp]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # Use LOCAL=0 to run tests in /tmp, to catch tests that use files from
 # the source directory without copying them to the test directory first.
 #
diff --git a/testsuite/driver/testlib.py b/testsuite/driver/testlib.py
index 6b6462f527b0a4eff895cc6460b0f4012518b025..423cd993136c30bcc5b582098c63ddf0e50821b8 100644
--- a/testsuite/driver/testlib.py
+++ b/testsuite/driver/testlib.py
@@ -992,6 +992,7 @@ def test(name: TestName,
             return
         else:
             # Note [Mutating config.only]
+            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
             # config.only is initially the set of tests requested by
             # the user (via 'make TEST='). We then remove all tests that
             # we've already seen (in .T files), so that we can later
@@ -2006,7 +2007,7 @@ def write_file(f: Path, s: str) -> None:
         h.write(s)
 
 # Note [Universal newlines]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
 # We don't want to write any Windows style line endings ever, because
 # it would mean that `make accept` would touch every line of the file
 # when switching between Linux and Windows.
@@ -2202,7 +2203,7 @@ def grep_output(normaliser: OutputNormalizer, pattern_file, actual_file, is_subs
     return success
 
 # Note [Output comparison]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~
 # We do two types of output comparison:
 #
 # 1. To decide whether a test has failed. We apply a `normaliser` and an
@@ -2218,7 +2219,7 @@ def grep_output(normaliser: OutputNormalizer, pattern_file, actual_file, is_subs
 #    possible (#10152).
 
 # Note [Null device handling]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # On windows the null device is 'nul' instead of '/dev/null'.
 # This can in principle be easily solved by using os.devnull.
 # Not doing so causes issues when python tries to read/write/open
diff --git a/testsuite/mk/boilerplate.mk b/testsuite/mk/boilerplate.mk
index 9b66ee1d1ea5407aefe2c2fef6638fbe80bd40a7..c3be669636dfcbd0030b63912864eae3b56e3945 100644
--- a/testsuite/mk/boilerplate.mk
+++ b/testsuite/mk/boilerplate.mk
@@ -50,7 +50,7 @@ endef
 ifeq "$(TEST_HC)" ""
 
 # Note [Spaces in TEST_HC]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~
 # Tests should be able to handle paths with spaces.
 #
 # One of the things ./validate (without --fast) does is check if binary
@@ -128,7 +128,7 @@ endif
 IN_TREE_COMPILER = NO
 
 # Note [The TEST_HC variable]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # As values of TEST_HC passed in by the user, we want to support:
 #  * both "ghc" and "/usr/bin/ghc"
 #      We use 'which' to convert the former to the latter.
diff --git a/testsuite/mk/test.mk b/testsuite/mk/test.mk
index 483d17c051d60d788476a177cfea7a5231cf82f7..e01a76eb29f1083a2605851f8fb8a21c107b54e6 100644
--- a/testsuite/mk/test.mk
+++ b/testsuite/mk/test.mk
@@ -384,7 +384,7 @@ list_broken:
 	$(MAKE) list_broken=YES
 
 # Note [Communicating options and variables to a submake]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # Consider the following scenario:
 #   * A test foo is defined as
 #     test('foo', [], run_command, ['$MAKE footarget'])
diff --git a/testsuite/tests/perf/haddock/all.T b/testsuite/tests/perf/haddock/all.T
index 5e90fd05a3a5f194a62b03bca33e790332813643..e2f3346898cfa6ef68a37a0071c0ac5f446c11a4 100644
--- a/testsuite/tests/perf/haddock/all.T
+++ b/testsuite/tests/perf/haddock/all.T
@@ -1,5 +1,5 @@
 # Note [Haddock runtime stats files]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # When one of the build systems builds a complete GHC distribution,
 # haddock gets built and then used to generate .haddock files for each
 # library. For that last step, both build systems pass an extra
diff --git a/testsuite/tests/stranal/sigs/T19871.hs b/testsuite/tests/stranal/sigs/T19871.hs
index 564a055df475a107c46f9417f99e9b2e4f14c2d7..ac9e81383664c7816475a77d93316ae7388ff660 100644
--- a/testsuite/tests/stranal/sigs/T19871.hs
+++ b/testsuite/tests/stranal/sigs/T19871.hs
@@ -1,6 +1,6 @@
 {-# OPTIONS_GHC -O2 -fforce-recomp #-}
 
--- | From Note [Boxity Analysis] and related Notes
+-- | From Note [Boxity analysis] and related Notes
 module T19871 where
 
 data Huge
diff --git a/utils/check-exact/ExactPrint.hs b/utils/check-exact/ExactPrint.hs
index 967ae6103544e9d6b9c66b0575132c357298f8aa..95a0348593485774ea396b732086d2e685d14cb8 100644
--- a/utils/check-exact/ExactPrint.hs
+++ b/utils/check-exact/ExactPrint.hs
@@ -868,7 +868,6 @@ exactDataFamInstDecl an top_lvl
 {-
 Note [an and an2 in exactDataFamInstDecl]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 The exactDataFamInstDecl function is called to render a
 DataFamInstDecl within its surrounding context. This context is
 rendered via the 'pp_hdr' function, which uses the exact print
diff --git a/utils/genapply/Main.hs b/utils/genapply/Main.hs
index a0209966db5b4c1fa8823c9a2a019682b9f00778..7166968ddd79f912f605b69e25cc58e5141a14ec 100644
--- a/utils/genapply/Main.hs
+++ b/utils/genapply/Main.hs
@@ -509,8 +509,8 @@ genMkPAP regstatus macro jump live ticker disamb
 
 
 -- Note [jump_SAVE_CCCS]
-
--- when profiling, if we have some extra arguments to apply that we
+-- ~~~~~~~~~~~~~~~~~~~~~
+-- When profiling, if we have some extra arguments to apply that we
 -- save to the stack, we must also save the current cost centre stack
 -- and restore it when applying the extra arguments.  This is all
 -- handled by the macro jump_SAVE_CCCS(target), defined in
diff --git a/utils/ghc-cabal/Main.hs b/utils/ghc-cabal/Main.hs
index e6df477238bd91ca4fabcd25c88b70f30cea624c..0514af148d98fdb99850913c35078f08040788d5 100644
--- a/utils/ghc-cabal/Main.hs
+++ b/utils/ghc-cabal/Main.hs
@@ -390,7 +390,8 @@ generate directory distdir config_args
           fixupRtsLibName x = x
           transitiveDepNames = map (display . packageName) transitive_dep_ids
 
-          -- Note [Msys2 path translation bug].
+          -- Note [Msys2 path translation bug]
+          -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           -- Msys2 has an annoying bug in their path conversion code.
           -- Officially anything starting with a drive letter should not be
           -- subjected to path translations, however it seems to only consider
diff --git a/utils/ghc-pkg/Main.hs b/utils/ghc-pkg/Main.hs
index bb6ee4d9faeae9d1717b8bf220143056af07fbc6..c977cb89c06e43c602938c6c5e95131013fc8f1e 100644
--- a/utils/ghc-pkg/Main.hs
+++ b/utils/ghc-pkg/Main.hs
@@ -1292,7 +1292,7 @@ updateDBCache verbosity db db_stack = do
       hasAnyAbiDepends x = length (abiDepends x) > 0
 
   -- warn when we find any (possibly-)bogus abi-depends fields;
-  -- Note [Recompute abi-depends]
+  -- See Note [Recompute abi-depends]
   when (verbosity >= Normal) $ do
     let definitelyBrokenPackages =
           nub
@@ -1341,7 +1341,6 @@ type PackageCacheFormat = GhcPkg.GenericUnitInfo
 
 {- Note [Recompute abi-depends]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 Like most fields, `ghc-pkg` relies on who-ever is performing package
 registration to fill in fields; this includes the `abi-depends` field present
 for the package.
diff --git a/utils/ghc-pkg/ghc.mk b/utils/ghc-pkg/ghc.mk
index e3740723d26e76eaa9f2253d1531d58032da695c..029d0b86e6e77908ab744849309676ea0d7c84fc 100644
--- a/utils/ghc-pkg/ghc.mk
+++ b/utils/ghc-pkg/ghc.mk
@@ -16,7 +16,7 @@
 utils/ghc-pkg_PACKAGE = ghc-pkg
 
 # Note [Why build certain utils twice?]
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # We build certain utils twice: once with stage0, and once with stage1.
 # Examples are ghc-pkg and hsc2hs.
 #
diff --git a/utils/llvm-targets/gen-data-layout.sh b/utils/llvm-targets/gen-data-layout.sh
index 982e178a47c36a5190bbd06a1734ff2348606a58..a5dfb0bf5a9d24bc0a1688e20f671f0a59130e98 100755
--- a/utils/llvm-targets/gen-data-layout.sh
+++ b/utils/llvm-targets/gen-data-layout.sh
@@ -16,7 +16,7 @@
 # Add missing targets to the list below to have them included in
 # llvm-targets file.
 #
-# See Note [LLVM Configuration] in SysTools for the whole story regarding LLVM
+# See Note [LLVM Configuration] in GHC.SysTools for the whole story regarding LLVM
 # configuration data.
 
 # Target sets for which to generate the llvm-targets file
diff --git a/validate b/validate
index 0f0f5814cbecea0c98711139cd0c269163be1bf8..2ecf37117f1d072ffea9c4069bcadb1482efb49d 100755
--- a/validate
+++ b/validate
@@ -251,8 +251,8 @@ if [ $build_only -eq 1 ] ||
         echo "ValidateSpeed=$speed" >> mk/are-validating.mk
         echo "ValidateHpc=$hpc"     >> mk/are-validating.mk
 
-        # Note [Default build system verbosity].
-        #
+        # Note [Default build system verbosity]
+        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         # From https://gitlab.haskell.org/ghc/ghc/wikis/design/build-system:
         #
         #   "The build system should clearly report what it's doing (and sometimes