| commit 5efbf0d243984444cf352ad6f0d147e226c64498 |
| Author: Sergei Trofimovich <slyfox@gentoo.org> |
| Date: Thu Sep 1 17:34:58 2016 +0100 |
| |
| restore -fmax-worker-args handling (Trac #11565) |
| |
| maxWorkerArgs handling was accidentally lost 3 years ago |
| in a major update of demand analysis |
| commit 0831a12ea2fc73c33652eeec1adc79fa19700578 |
| |
| Old regression is noticeable as: |
| - code bloat (requires stack reshuffling) |
| - compilation slowdown (more code to optimise/generate) |
| - and increased heap usage (DynFlags unboxing/reboxing?) |
| |
| On a simple compile benchmark this change causes heap |
| allocation drop from 70G don to 67G (ghc perf build). |
| |
| Signed-off-by: Sergei Trofimovich <siarheit@google.com> |
| |
| Reviewers: simonpj, ezyang, goldfire, austin, bgamari |
| |
| Reviewed By: simonpj, ezyang |
| |
| Subscribers: thomie |
| |
| Differential Revision: https://phabricator.haskell.org/D2503 |
| |
| GHC Trac Issues: #11565 |
| |
| diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs |
| index 10d5614..7166f57 100644 |
| --- a/compiler/specialise/SpecConstr.hs |
| +++ b/compiler/specialise/SpecConstr.hs |
| @@ -29,7 +29,7 @@ import CoreFVs ( exprsFreeVarsList ) |
| import CoreMonad |
| import Literal ( litIsLifted ) |
| import HscTypes ( ModGuts(..) ) |
| -import WwLib ( mkWorkerArgs ) |
| +import WwLib ( isWorkerSmallEnough, mkWorkerArgs ) |
| import DataCon |
| import Coercion hiding( substCo ) |
| import Rules |
| @@ -1533,10 +1533,14 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs |
| |
| | Just all_calls <- lookupVarEnv bind_calls fn |
| = -- pprTrace "specialise entry {" (ppr fn <+> ppr (length all_calls)) $ |
| - do { (boring_call, pats) <- callsToPats env specs arg_occs all_calls |
| - |
| + do { (boring_call, all_pats) <- callsToPats env specs arg_occs all_calls |
| -- Bale out if too many specialisations |
| - ; let n_pats = length pats |
| + ; let pats = filter (is_small_enough . fst) all_pats |
| + is_small_enough vars = isWorkerSmallEnough (sc_dflags env) vars |
| + -- We are about to construct w/w pair in 'spec_one'. |
| + -- Omit specialisation leading to high arity workers. |
| + -- See Note [Limit w/w arity] |
| + n_pats = length pats |
| spec_count' = n_pats + spec_count |
| ; case sc_count env of |
| Just max | not (sc_force env) && spec_count' > max |
| diff --git a/compiler/stranal/WwLib.hs b/compiler/stranal/WwLib.hs |
| index 09bc204..d9460d9 100644 |
| --- a/compiler/stranal/WwLib.hs |
| +++ b/compiler/stranal/WwLib.hs |
| @@ -8,6 +8,7 @@ |
| |
| module WwLib ( mkWwBodies, mkWWstr, mkWorkerArgs |
| , deepSplitProductType_maybe, findTypeShape |
| + , isWorkerSmallEnough |
| ) where |
| |
| #include "HsVersions.h" |
| @@ -144,7 +145,8 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots |
| wrapper_body = wrap_fn_args . wrap_fn_cpr . wrap_fn_str . applyToVars work_call_args . Var |
| worker_body = mkLams work_lam_args. work_fn_str . work_fn_cpr . work_fn_args |
| |
| - ; if useful1 && not (only_one_void_argument) || useful2 |
| + ; if isWorkerSmallEnough dflags work_args |
| + && (useful1 && not only_one_void_argument || useful2) |
| then return (Just (worker_args_dmds, wrapper_body, worker_body)) |
| else return Nothing |
| } |
| @@ -165,6 +167,12 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots |
| | otherwise |
| = False |
| |
| +-- See Note [Limit w/w arity] |
| +isWorkerSmallEnough :: DynFlags -> [Var] -> Bool |
| +isWorkerSmallEnough dflags vars = count isId vars <= maxWorkerArgs dflags |
| + -- We count only Free variables (isId) to skip Type, Kind |
| + -- variables which have no runtime representation. |
| + |
| {- |
| Note [Always do CPR w/w] |
| ~~~~~~~~~~~~~~~~~~~~~~~~ |
| @@ -178,6 +186,30 @@ a disaster, because then the enclosing function might say it has the CPR |
| property, but now doesn't and there a cascade of disaster. A good example |
| is Trac #5920. |
| |
| +Note [Limit w/w arity] |
| +~~~~~~~~~~~~~~~~~~~~~~~~ |
| +Guard against high worker arity as it generates a lot of stack traffic. |
| +A simplified example is Trac #11565#comment:6 |
| + |
| +Current strategy is very simple: don't perform w/w transformation at all |
| +if the result produces a wrapper with arity higher than -fmax-worker-args=. |
| + |
| +It is a bit all or nothing, consider |
| + |
| + f (x,y) (a,b,c,d,e ... , z) = rhs |
| + |
| +Currently we will remove all w/w ness entirely. But actually we could |
| +w/w on the (x,y) pair... it's the huge product that is the problem. |
| + |
| +Could we instead refrain from w/w on an arg-by-arg basis? Yes, that'd |
| +solve f. But we can get a lot of args from deeply-nested products: |
| + |
| + g (a, (b, (c, (d, ...)))) = rhs |
| + |
| +This is harder to spot on an arg-by-arg basis. Previously mkWwStr was |
| +given some "fuel" saying how many arguments it could add; when we ran |
| +out of fuel it would stop w/wing. |
| +Still not very clever because it had a left-right bias. |
| |
| ************************************************************************ |
| * * |