Significant compiler allocations regression on Windows due to `directory` submodule bump and AFPP
While bumping (!9496 (merged)) the base
version for GHC 9.6 I found that compiler allocations regressed rather catastrophically on Windows:
Metrics: compile_time/bytes allocated
-------------------------------------
Baseline
Test Metric value New value Change
---------------------------------------------------------------------------------------
CoOpt_Read(normal) ghc/alloc 772,439,104 777,834,360 +0.7%
CoOpt_Singletons(normal) ghc/alloc 974,633,336 978,143,824 +0.4%
InstanceMatching(normal) ghc/alloc 5,033,958,848 5,037,913,288 +0.1%
InstanceMatching1(normal) ghc/alloc 27,826,389,128 27,829,658,264 +0.0%
LargeRecord(normal) ghc/alloc 3,017,228,168 3,033,282,280 +0.5%
ManyAlternatives(normal) ghc/alloc 716,856,944 720,243,424 +0.5%
ManyConstructors(normal) ghc/alloc 3,799,913,912 3,803,319,688 +0.1%
MultiComponentModules(normal) ghc/alloc 3,991,442,856 7,089,563,704 +77.6% BAD
MultiComponentModulesRecomp(normal) ghc/alloc 2,912,034,888 6,078,103,600 +108.7% BAD
MultiLayerModules(normal) ghc/alloc 4,094,434,712 5,406,364,632 +32.0% BAD
MultiLayerModulesRecomp(normal) ghc/alloc 1,259,636,416 1,769,318,680 +40.5% BAD
PmSeriesG(normal) ghc/alloc 52,478,952 55,748,808 +6.2%
PmSeriesS(normal) ghc/alloc 60,867,904 64,132,624 +5.4%
PmSeriesT(normal) ghc/alloc 83,432,512 86,692,168 +3.9%
PmSeriesV(normal) ghc/alloc 60,250,856 63,512,552 +5.4%
T10421(normal) ghc/alloc 123,738,416 131,944,800 +6.6% BAD
T10421a(normal) ghc/alloc 91,659,344 97,551,144 +6.4%
T10547(normal) ghc/alloc 30,238,520 32,516,080 +7.5% BAD
T10858(normal) ghc/alloc 130,663,496 134,999,872 +3.3%
T11195(normal) ghc/alloc 202,313,184 214,701,424 +6.1%
T11276(normal) ghc/alloc 102,287,288 106,918,312 +4.5%
T11303b(normal) ghc/alloc 47,030,984 50,086,432 +6.5%
T11374(normal) ghc/alloc 186,906,120 190,833,400 +2.1%
T11545(normal) ghc/alloc 91,453,216 94,814,168 +3.7%
T11822(normal) ghc/alloc 105,363,040 109,914,712 +4.3%
T12150(optasm) ghc/alloc 86,455,824 89,540,776 +3.6% BAD
T12227(normal) ghc/alloc 496,109,592 503,898,816 +1.6% BAD
T12234(optasm) ghc/alloc 62,903,384 65,610,992 +4.3% BAD
T12425(optasm) ghc/alloc 94,958,176 99,627,864 +4.9% BAD
T12545(normal) ghc/alloc 1,683,662,144 1,690,420,936 +0.4%
T12707(normal) ghc/alloc 896,876,184 902,490,216 +0.6%
T13035(normal) ghc/alloc 107,506,112 110,721,016 +3.0% BAD
T13056(optasm) ghc/alloc 320,123,560 325,368,856 +1.6%
T13253(normal) ghc/alloc 336,663,416 347,185,456 +3.1% BAD
T13253-spj(normal) ghc/alloc 132,394,472 135,383,216 +2.3% BAD
T13379(normal) ghc/alloc 333,064,928 335,758,736 +0.8%
T13386(normal) ghc/alloc 853,276,640 857,346,288 +0.5%
T13701(normal) ghc/alloc 3,756,135,408 5,602,629,576 +49.2% BAD
T13719(normal) ghc/alloc 6,122,897,504 6,674,428,928 +9.0% BAD
T14052(ghci) ghc/alloc 3,867,429,128 3,982,110,168 +3.0%
T14052Type(ghci) ghc/alloc 3,868,617,672 3,907,841,112 +1.0%
T14683(normal) ghc/alloc 2,835,515,960 2,844,181,936 +0.3%
T14766(normal) ghc/alloc 986,719,552 989,678,744 +0.3%
T15164(normal) ghc/alloc 1,294,531,864 1,297,704,400 +0.2%
T15304(normal) ghc/alloc 1,292,260,192 1,295,464,120 +0.2%
T15630(normal) ghc/alloc 164,238,120 167,473,264 +2.0%
T15703(normal) ghc/alloc 542,380,568 549,526,832 +1.3% BAD
T16190(normal) ghc/alloc 289,489,592 341,787,024 +18.1%
T16577(normal) ghc/alloc 7,548,784,224 7,555,819,784 +0.1%
T16875(normal) ghc/alloc 44,141,280 49,486,912 +12.1% BAD
T17096(normal) ghc/alloc 217,070,360 221,147,344 +1.9%
T17516(normal) ghc/alloc 1,763,363,552 1,771,808,920 +0.5%
T17836(normal) ghc/alloc 861,002,488 865,772,360 +0.6%
T17836b(normal) ghc/alloc 52,859,864 55,794,192 +5.6%
T17977(normal) ghc/alloc 47,666,392 51,313,768 +7.7%
T17977b(normal) ghc/alloc 45,294,016 48,639,736 +7.4%
T18140(normal) ghc/alloc 83,162,024 85,711,984 +3.1% BAD
T18223(normal) ghc/alloc 482,707,944 491,234,344 +1.8%
T18282(normal) ghc/alloc 143,968,208 148,498,496 +3.1% BAD
T18304(normal) ghc/alloc 83,567,792 87,171,504 +4.3% BAD
T18478(normal) ghc/alloc 504,387,744 512,940,048 +1.7%
T18698a(normal) ghc/alloc 204,063,736 210,971,344 +3.4% BAD
T18698b(normal) ghc/alloc 225,197,720 232,103,240 +3.1% BAD
T18923(normal) ghc/alloc 73,642,960 76,338,768 +3.7% BAD
T1969(normal) ghc/alloc 700,809,216 703,486,000 +0.4%
T19695(normal) ghc/alloc 1,502,796,504 1,517,183,376 +1.0%
T20049(normal) ghc/alloc 97,462,872 100,160,288 +2.8% BAD
T20261(normal) ghc/alloc 607,752,576 617,067,568 +1.5%
T21839c(normal) ghc/alloc 455,908,944 470,896,688 +3.3% BAD
T21839r(normal) ghc/alloc 468,861,592 485,916,112 +3.6% BAD
T3064(normal) ghc/alloc 188,707,352 192,103,328 +1.8%
T3294(normal) ghc/alloc 1,346,903,544 1,350,088,976 +0.2%
T4801(normal) ghc/alloc 299,622,744 302,764,360 +1.0%
T5030(normal) ghc/alloc 364,269,624 368,658,760 +1.2%
T5321FD(normal) ghc/alloc 246,302,384 249,556,560 +1.3%
T5321Fun(normal) ghc/alloc 275,424,144 278,713,912 +1.2%
T5631(normal) ghc/alloc 534,762,160 540,107,696 +1.0%
T5642(normal) ghc/alloc 468,054,144 472,306,952 +0.9%
T5837(normal) ghc/alloc 43,020,120 46,615,848 +8.4% BAD
T6048(optasm) ghc/alloc 108,054,016 111,421,840 +3.1% BAD
T783(normal) ghc/alloc 379,090,528 381,832,952 +0.7%
T8095(normal) ghc/alloc 3,296,676,048 3,302,890,008 +0.2%
T9020(optasm) ghc/alloc 242,483,456 246,282,768 +1.6%
T9198(normal) ghc/alloc 52,326,624 55,391,352 +5.9% BAD
T9233(normal) ghc/alloc 728,899,040 734,994,424 +0.8%
T9630(normal) ghc/alloc 1,037,776,392 1,049,771,016 +1.2%
T9675(optasm) ghc/alloc 454,375,472 456,746,528 +0.5%
T9872a(normal) ghc/alloc 1,771,269,432 1,773,953,272 +0.2%
T9872b(normal) ghc/alloc 2,069,302,272 2,071,987,824 +0.1%
T9872b_defer(normal) ghc/alloc 3,144,990,792 3,148,602,808 +0.1%
T9872c(normal) ghc/alloc 1,718,411,280 1,721,093,280 +0.2%
T9872d(normal) ghc/alloc 455,274,552 458,733,512 +0.8%
T9961(normal) ghc/alloc 337,018,672 340,617,472 +1.1% BAD
TcPlugin_RewritePerf(normal) ghc/alloc 2,308,610,096 2,421,562,640 +4.9% BAD
WWRec(normal) ghc/alloc 575,244,288 578,160,680 +0.5%
hard_hole_fits(normal) ghc/alloc 459,129,656 477,532,776 +4.0% BAD
hie002(normal) ghc/alloc 9,256,398,632 9,262,014,928 +0.1%
parsing001(normal) ghc/alloc 528,573,672 530,252,376 +0.3%
geo. mean +5.0%
minimum +0.0%
maximum +108.7%
After quite a bit of digging, I was able to trace the cause back to the update of the directory
submodule. My suspicion is that the regression is specifically due to the changes made under the Abstract FilePath
Proposal in ghc/packages/directory@78b3e596. Specifically, it appears that we now round-trip between lists and UTF-16 encoded ByteArray
s quite often.
The changes in the ticky profile (using the extreme case of MultiComponentModulesRecomp
as a benchmark) seem to confirm that directory
is the culprit:
| Change | alloc A | alloc B | name |
|--------------|-----------|----------|--------------------------------------------------------------------------|
| +2737600.0 | 0 | 2737600 | sat_sZZK (:GHC.Parser) |
| +924144.0 | 1389816 | 2313960 | System.Directory.Internal.Common.simplifyWindows (:<no module>) |
| +875680.0 | 0 | 875680 | $lgo1_gb5m (:GHC.Unit.Module.ModSummary) |
| +309040.0 | 1061568 | 1370608 | Foreign.Marshal.Alloc.$wallocaBytesAligned (:<no module>) |
| +308832.0 | 0 | 308832 | System.Directory.Internal.Windows.toExtendedLengthPath (:<no module>) |
| +307648.0 | 0 | 307648 | System.Directory.Internal.Windows.fromExtendedLengthPath (:<no module>) |
| +192320.0 | 614960 | 807280 | Data.IntSet.Internal.union (:<no module>) |
| +121800.0 | 0 | 121800 | sat_s551 (:GHC.Unit.Info) |
| +115672.0 | 1982928 | 2098600 | Data.IntSet.Internal.$winsertBM (:<no module>) |
| +66528.0 | 17228352 | 17294880 | Data.Map.Internal.balanceL (:<no module>) |
| +19520.0 | 161040 | 180560 | $wmakeNewModSummary (:GHC.Driver.Make) |
| +14640.0 | 0 | 14640 | sat_sgjd (:GHC.Parser.Header) |
| +11216.0 | 331032 | 342248 | Data.IntMap.Internal.restrictKeys (:<no module>) |
| +10640.0 | 0 | 10640 | sat_s2Su (:GHC.Unit.Parser) |
| +7376.0 | 190248 | 197624 | Data.IntMap.Strict.Internal.insertWithKey_$sinsertWithKey (:<no module>) |
| +5712.0 | 10066896 | 10072608 | go25 (:GHC.Unit.State) |
| +3696.0 | 7055664 | 7059360 | GHC.Unit.State.$sgo8_$s$sgo1 (:<no module>) |
| +2840.0 | 0 | 2840 | sat_smOo (:GHC.Core.Rules) |
...
Note that the above table doesn't capture the full extent of the damage since there are many functions which are present only in the AFPP version which are not shown (due to the analysis which generated the table). Looking at functions only present in AFPP version we see a great deal of added cost due to directory
:
| alloc B | name |
|-----------|-----------------------------------------------------------------------------------------|
| 340581448 | System.OsPath.Windows.Internal.$wreadDriveLetter (:<no module>) |
| 232381920 | System.OsPath.Windows.Internal.$wreadDriveUNC (:<no module>) |
| 201530400 | System.OsPath.Windows.Internal.$wreadDriveShare (:<no module>) |
| 154400640 | System.OsString.Windows.$wunpack (:<no module>) |
| 50043968 | System.OsPath.Data.ByteString.Short.Word16.$wbreak (:<no module>) |
| 47893880 | vals (:System.OsPath.Data.ByteString.Short.Internal) |
| 32676240 | System.OsPath.Data.ByteString.Short.Internal.$s$wpeekArray (:<no module>) |
| 30249864 | System.OsPath.Data.ByteString.Short.Word16.$wsplitAt (:<no module>) |
| 25021984 | sat_s3vV (:System.OsPath.Data.ByteString.Short.Word16) |
| 24681216 | System.OsPath.Windows.Internal.splitDirectories_f (:<no module>) |
| 22124256 | sat_stQN (:GHC.Parser.Lexer) |
| 21866624 | System.OsPath.Encoding.Internal.charsToCWchars (:<no module>) |
| 19278912 | System.OsPath.Windows.Internal.$wjoinDrive (:<no module>) |
| 18510912 | go1 (:System.Directory.Internal.Common) |
| 16779280 | sat_spxp (:GHC.Unit.Finder) |
| 13512208 | sat_s5fG (:System.OsPath.Encoding.Internal) |
| 9969680 | sat_spww (:GHC.Unit.Finder) |
| 9874624 | System.OsPath.Data.ByteString.Short.Word16.$wdropWhileEnd (:<no module>) |
| 7732880 | System.OsPath.Windows.Internal.$waddSlash (:<no module>) |
| 6959592 | sat_s50Z (:System.OsPath.Windows.Internal) |
| 6947592 | ds (:System.OsPath.Windows.Internal) |
| 6945160 | System.Directory.Internal.Common.expandDots_go (:<no module>) |
| 5865024 | sat_s3uo (:System.OsPath.Data.ByteString.Short.Word16) |
| 5696960 | sat_spwv (:GHC.Unit.Finder) |
| 5246160 | sat_s3uV (:System.OsPath.Data.ByteString.Short.Word16) |
| 5163984 | sat_sn2G (:GHC.Unit.State) |
| 4937312 | sat_s3BM (:System.OsPath.Data.ByteString.Short.Word16) |
| 4627728 | ds1 (:System.OsPath.Windows.Internal) |
| 3866440 | sat_s50T (:System.OsPath.Windows.Internal) |
| 3691776 | System.Directory.Internal.Windows.$wfromExtendedLengthPath (:<no module>) |
| 3245088 | sat_saxc (:GHC.Unit.Database) |
| 3085152 | System.OsString.Internal.pack1 (:<no module>) |
| 3085152 | sat_s5kK (:System.OsPath.Windows.Internal) |
| 2778064 | System.OsPath.Windows.Internal.combine (:<no module>) |
| 2774232 | System.OsPath.Data.ByteString.Short.Internal.useAsCWString (:<no module>) |
| 2737600 | sat_sZZx (:GHC.Parser) |
| 2600720 | sat_sRVb (:GHC.Parser) |
| 2468656 | System.OsPath.Data.ByteString.Short.Word16.$wtake (:<no module>) |
| 2433792 | unpack (:GHC.Encoding.UTF8) |
| 2309160 | System.OsPath.Windows.Internal.splitDrive (:<no module>) |
| 1698576 | System.Win32.WindowsString.File.createFile (:<no module>) |
| 1546576 | sat_s4TZ (:System.OsPath.Windows.Internal) |
| 1546576 | sat_s50V (:System.OsPath.Windows.Internal) |
| 1235328 | sat_s3mp (:System.Win32.WindowsString.File) |
| 1163864 | $l$wgo3_ggkN (:GHC.Parser.Header) |
...