diff options
Diffstat (limited to 'dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch')
-rw-r--r-- | dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch | 65 |
1 files changed, 0 insertions, 65 deletions
diff --git a/dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch b/dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch deleted file mode 100644 index b46e57301782..000000000000 --- a/dev-lang/ghc/files/ghc-8.0.1-par-g0-on-A32.patch +++ /dev/null @@ -1,65 +0,0 @@ -commit bdfc5375f219d6def81effda4e57cb56d01fc917 -Author: Sergei Trofimovich <slyfox@gentoo.org> -Date: Tue Aug 30 12:10:54 2016 +0100 - - rts: enable parallel GC scan of large (32M+) allocation area - - Parallel GC does not scan large allocation area (-A) - effectively as it does not do work stealing from nursery - by default. - - That leads to large imbalance when only one of threads - overflows allocation area: most of GC threads finish - quickly (as there is not much to collect) and sit idle - waiting while single GC thread finishes scan of single - allocation area for that thread. - - The patch enables work stealing for (equivalent of -qb0) - allocation area of -A32M or higher. - - Tested on a highlighting-kate package from Trac #9221 - - On 8-core machine the difference is around 5% faster - of wall-clock time. On 24-core VM the speedup is 20%. - - Signed-off-by: Sergei Trofimovich <siarheit@google.com> - - Test Plan: measured wall time and GC parallelism on highlighting-kate build - - Reviewers: austin, bgamari, erikd, simonmar - - Reviewed By: bgamari, simonmar - - Subscribers: thomie - - Differential Revision: https://phabricator.haskell.org/D2483 - - GHC Trac Issues: #9221 - -diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c -index fda33f0..7a719b9 100644 ---- a/rts/RtsFlags.c -+++ b/rts/RtsFlags.c -@@ -237,1 +237,1 @@ void initRtsFlagsDefaults(void) -- RtsFlags.ParFlags.parGcLoadBalancingGen = 1; -+ RtsFlags.ParFlags.parGcLoadBalancingGen = ~0u; /* auto, based on -A */ -@@ -1398,2 +1390,19 @@ static void normaliseRtsOpts (void) - } - -+#ifdef THREADED_RTS -+ if (RtsFlags.ParFlags.parGcLoadBalancingGen == ~0u) { -+ StgWord alloc_area_bytes -+ = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE; -+ -+ // If allocation area is larger that CPU cache -+ // we can finish scanning quicker doing work-stealing -+ // scan. Trac #9221 -+ // 32M looks big enough not to fit into L2 cache -+ // of popular modern CPUs. -+ if (alloc_area_bytes >= 32 * 1024 * 1024) { -+ RtsFlags.ParFlags.parGcLoadBalancingGen = 0; -+ } else { -+ RtsFlags.ParFlags.parGcLoadBalancingGen = 1; -+ } -+ } -+#endif |