{- | Resolving and applying the process's runtime posture -- how many capabilities
Écluse claims and what heap ceiling it runs under -- from first-class configuration
with a cgroup-derived fallback, logged at boot with each decision's provenance.

The GHC RTS sizes itself from what the /machine/ looks like: bare @-N@ claims a
capability per visible processor, and the heap is unbounded unless @-M@ says
otherwise. In a container neither default matches the pod: a CPU __limit__ is a
cgroup quota that does not shrink the visible processor count, so the RTS claims a
whole node's worth of capabilities under a two-CPU quota, and the only memory
backstop is the kernel OOM killer. This module closes that gap the way Go's
@automaxprocs@ does, but config-first:

1. __Explicit configuration wins__: @cores@ (@ECLUSE_CORES@) and @maxHeapBytes@
   (@ECLUSE_MAX_HEAP_BYTES@).
2. __Omitted values fall back to the cgroup__ (v2): @cpu.max@'s quota, __floored__
   (at least one) and clamped to the visible processors, and @memory.max@ less the
   nursery budget and slack ('deriveMaxHeapBytes'). Flooring follows Go's
   @automaxprocs@: a capability count above the budget lets a stop-the-world
   collection outrun the CFS quota and freeze mid-pause, so a fractional
   entitlement is stranded rather than borrowed against.
3. __No limit found either way__: the posture the RTS already resolved (its baked
   defaults plus any @GHCRTS@ the operator set) stands, and the log says so.

Every decision is logged through the standard boot log with its provenance
('renderRuntimePosture'), so an operator reads what was decided or interpreted
straight from the start-up lines.

This resolution is __role-agnostic on purpose, and only the resolution__: cores and
the heap ceiling derive from the container's limits, which bind every role (proxy,
Pilot, Dredger) alike. Workload-shaped tuning -- the allocation area, sized for the
proxy's serve path -- is deliberately not modelled per role here; a role whose
profile diverges is tuned per-deployment via @GHCRTS@ until its shape earns a
default of its own.

== Applying the plan: 'setNumCapabilities', or one exec-in-place

A capability change is applied in-process ('GHC.Conc.setNumCapabilities'). The heap
ceiling has no in-process setter -- @-M@ is fixed when the RTS starts -- so when the
plan requires one, the boot __re-executes its own binary once__ with the resolved
flags appended to @GHCRTS@ (later flags win, verified against GHC 9.10). The exec
replaces the program image in the same process: the PID never exits, so a container
supervisor sees an uninterrupted process, exactly as an @exec@-ing entrypoint script
behaves. A marker variable ('reexecMarker') guards against loops: the re-launched
process sees it, skips any further exec, and only logs (a warning, if the RTS still
diverges from the plan -- an operator's @GHCRTS@ fighting the config, or a flag the
RTS rejected). A failure of the exec call itself is likewise degraded to a warning
and an unenforced posture: tuning never loops the boot and never takes the service
down.

The pure resolution ('resolveRuntimePlan'), the cgroup parsing ('parseCpuMax',
'parseMemoryMax'), and the rendering are separated from the thin IO shell
('applyRuntimePosture') so the precedence and arithmetic are unit-tested without a
cgroup in sight. Sizes are bytes everywhere here; the RTS flag fields count 4 KiB
blocks and are converted at the read boundary ('rtsBlockBytes').
-}
module Ecluse.Runtime (
    -- * Applying the resolved posture at boot
    applyRuntimePosture,

    -- * The pure resolution core
    RtsPosture (..),
    CgroupLimits (..),
    Provenance (..),
    RuntimePlan (..),
    resolveRuntimePlan,
    deriveMaxHeapBytes,
    requiredRtsFlags,
    renderRuntimePosture,

    -- * Cgroup v2 parsing
    parseCpuMax,
    parseMemoryMax,
    parseCgroupSelfPath,
    ancestorPaths,
) where

import Data.Text qualified as T
import GHC.Conc (getNumCapabilities, getNumProcessors, setNumCapabilities)
import GHC.RTS.Flags (GCFlags (maxHeapSize, minAllocAreaSize, nurseryChunkSize), getGCFlags)
import System.Environment (getEnvironment, getExecutablePath)
import System.IO.Error (isDoesNotExistError)
import System.Posix.Process (executeFile)
import UnliftIO (tryIO, tryJust)

{- | The RTS posture the process is actually running with, in bytes. Read once at
boot ('currentRtsPosture'); the plan is resolved against it and the log renders it.
-}
data RtsPosture = RtsPosture
    { RtsPosture -> Int
rpCapabilities :: Int
    -- ^ Capabilities claimed ('getNumCapabilities' at boot).
    , RtsPosture -> Int
rpProcessors :: Int
    -- ^ Processors the RTS can see -- the ceiling a derived capability count clamps to.
    , RtsPosture -> Int
rpAllocAreaBytes :: Int
    -- ^ The per-capability allocation area (@-A@), bytes.
    , RtsPosture -> Maybe Int
rpNurseryChunkBytes :: Maybe Int
    -- ^ The nursery chunk size (@-n@), bytes; 'Nothing' when unset.
    , RtsPosture -> Maybe Int
rpMaxHeapBytes :: Maybe Int
    -- ^ The heap ceiling (@-M@), bytes; 'Nothing' when unlimited.
    }
    deriving stock (RtsPosture -> RtsPosture -> Bool
(RtsPosture -> RtsPosture -> Bool)
-> (RtsPosture -> RtsPosture -> Bool) -> Eq RtsPosture
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: RtsPosture -> RtsPosture -> Bool
== :: RtsPosture -> RtsPosture -> Bool
$c/= :: RtsPosture -> RtsPosture -> Bool
/= :: RtsPosture -> RtsPosture -> Bool
Eq, Int -> RtsPosture -> ShowS
[RtsPosture] -> ShowS
RtsPosture -> String
(Int -> RtsPosture -> ShowS)
-> (RtsPosture -> String)
-> ([RtsPosture] -> ShowS)
-> Show RtsPosture
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> RtsPosture -> ShowS
showsPrec :: Int -> RtsPosture -> ShowS
$cshow :: RtsPosture -> String
show :: RtsPosture -> String
$cshowList :: [RtsPosture] -> ShowS
showList :: [RtsPosture] -> ShowS
Show)

{- | What the cgroup (v2) grants this process: the CPU quota in cores
(@cpu.max@, quota over period) and the memory ceiling in bytes (@memory.max@).
'Nothing' per axis when the file is absent (not a cgroup-v2 environment) or the
value is the unlimited @max@ sentinel.
-}
data CgroupLimits = CgroupLimits
    { CgroupLimits -> Maybe Double
cgCpuCores :: Maybe Double
    , CgroupLimits -> Maybe Int
cgMemoryMaxBytes :: Maybe Int
    }
    deriving stock (CgroupLimits -> CgroupLimits -> Bool
(CgroupLimits -> CgroupLimits -> Bool)
-> (CgroupLimits -> CgroupLimits -> Bool) -> Eq CgroupLimits
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: CgroupLimits -> CgroupLimits -> Bool
== :: CgroupLimits -> CgroupLimits -> Bool
$c/= :: CgroupLimits -> CgroupLimits -> Bool
/= :: CgroupLimits -> CgroupLimits -> Bool
Eq, Int -> CgroupLimits -> ShowS
[CgroupLimits] -> ShowS
CgroupLimits -> String
(Int -> CgroupLimits -> ShowS)
-> (CgroupLimits -> String)
-> ([CgroupLimits] -> ShowS)
-> Show CgroupLimits
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> CgroupLimits -> ShowS
showsPrec :: Int -> CgroupLimits -> ShowS
$cshow :: CgroupLimits -> String
show :: CgroupLimits -> String
$cshowList :: [CgroupLimits] -> ShowS
showList :: [CgroupLimits] -> ShowS
Show)

-- | Where a resolved value came from, for the boot log's provenance clause.
data Provenance
    = -- | Explicit Écluse configuration (@cores@ \/ @maxHeapBytes@).
      FromConfig
    | -- | Derived from the cgroup limits.
      FromCgroup
    | -- | Left as the RTS resolved it (baked defaults plus any operator @GHCRTS@).
      FromRts
    deriving stock (Provenance -> Provenance -> Bool
(Provenance -> Provenance -> Bool)
-> (Provenance -> Provenance -> Bool) -> Eq Provenance
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Provenance -> Provenance -> Bool
== :: Provenance -> Provenance -> Bool
$c/= :: Provenance -> Provenance -> Bool
/= :: Provenance -> Provenance -> Bool
Eq, Int -> Provenance -> ShowS
[Provenance] -> ShowS
Provenance -> String
(Int -> Provenance -> ShowS)
-> (Provenance -> String)
-> ([Provenance] -> ShowS)
-> Show Provenance
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Provenance -> ShowS
showsPrec :: Int -> Provenance -> ShowS
$cshow :: Provenance -> String
show :: Provenance -> String
$cshowList :: [Provenance] -> ShowS
showList :: [Provenance] -> ShowS
Show)

{- | The resolved runtime posture: the capability count to run with and the heap
ceiling to enforce, each with its provenance. A 'FromRts' entry means "leave it
alone": the plan never overrides a posture it has no better information than.
-}
data RuntimePlan = RuntimePlan
    { RuntimePlan -> (Int, Provenance)
planCapabilities :: (Int, Provenance)
    , RuntimePlan -> (Maybe Int, Provenance)
planMaxHeapBytes :: (Maybe Int, Provenance)
    }
    deriving stock (RuntimePlan -> RuntimePlan -> Bool
(RuntimePlan -> RuntimePlan -> Bool)
-> (RuntimePlan -> RuntimePlan -> Bool) -> Eq RuntimePlan
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: RuntimePlan -> RuntimePlan -> Bool
== :: RuntimePlan -> RuntimePlan -> Bool
$c/= :: RuntimePlan -> RuntimePlan -> Bool
/= :: RuntimePlan -> RuntimePlan -> Bool
Eq, Int -> RuntimePlan -> ShowS
[RuntimePlan] -> ShowS
RuntimePlan -> String
(Int -> RuntimePlan -> ShowS)
-> (RuntimePlan -> String)
-> ([RuntimePlan] -> ShowS)
-> Show RuntimePlan
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> RuntimePlan -> ShowS
showsPrec :: Int -> RuntimePlan -> ShowS
$cshow :: RuntimePlan -> String
show :: RuntimePlan -> String
$cshowList :: [RuntimePlan] -> ShowS
showList :: [RuntimePlan] -> ShowS
Show)

{- | Resolve the runtime plan from the three layers, strongest first: explicit
config, then the cgroup, then the live RTS posture.

Capabilities: an explicit @cores@ wins; else the cgroup CPU quota rounded __up__
(a 0.5-CPU pod still needs one capability) and clamped to the visible processors;
else the RTS's own count stands. Always at least 1.

Heap ceiling: an explicit @maxHeapBytes@ wins; else 'deriveMaxHeapBytes' over the
cgroup memory limit and the __planned__ capability count (the nursery the process
will actually run with); else the RTS posture stands -- notably, an operator's
@GHCRTS -M@ is never overridden by mere derivation, and an absent limit is left
absent rather than fabricated.
-}
resolveRuntimePlan :: Maybe Int -> Maybe Int -> CgroupLimits -> RtsPosture -> RuntimePlan
resolveRuntimePlan :: Maybe Int -> Maybe Int -> CgroupLimits -> RtsPosture -> RuntimePlan
resolveRuntimePlan Maybe Int
cfgCores Maybe Int
cfgMaxHeap CgroupLimits
cgroup RtsPosture
rts =
    RuntimePlan
        { planCapabilities :: (Int, Provenance)
planCapabilities = (Int, Provenance)
capabilities
        , planMaxHeapBytes :: (Maybe Int, Provenance)
planMaxHeapBytes = (Maybe Int, Provenance)
maxHeap
        }
  where
    capabilities :: (Int, Provenance)
capabilities = case (Maybe Int
cfgCores, CgroupLimits -> Maybe Double
cgCpuCores CgroupLimits
cgroup) of
        (Just Int
n, Maybe Double
_) -> (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
1 Int
n, Provenance
FromConfig)
        -- Floored, as Go's automaxprocs does: claiming above the budget would let a
        -- stop-the-world collection (all capabilities at once) outrun the CFS quota
        -- and freeze mid-pause. The clamp's floor of one covers sub-1 quotas.
        (Maybe Int
Nothing, Just Double
quota) -> (Int -> Int
clamp (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor Double
quota), Provenance
FromCgroup)
        (Maybe Int
Nothing, Maybe Double
Nothing) -> (RtsPosture -> Int
rpCapabilities RtsPosture
rts, Provenance
FromRts)

    clamp :: Int -> Int
clamp Int
n = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
1 (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (RtsPosture -> Int
rpProcessors RtsPosture
rts) Int
n)

    maxHeap :: (Maybe Int, Provenance)
maxHeap = case (Maybe Int
cfgMaxHeap, CgroupLimits -> Maybe Int
cgMemoryMaxBytes CgroupLimits
cgroup) of
        (Just Int
bytes, Maybe Int
_) -> (Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Int
alignToBlock Int
bytes), Provenance
FromConfig)
        (Maybe Int
Nothing, Just Int
memMax) ->
            (Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Int -> Int -> Int
deriveMaxHeapBytes Int
memMax ((Int, Provenance) -> Int
forall a b. (a, b) -> a
fst (Int, Provenance)
capabilities) (RtsPosture -> Int
rpAllocAreaBytes RtsPosture
rts)), Provenance
FromCgroup)
        (Maybe Int
Nothing, Maybe Int
Nothing) -> (RtsPosture -> Maybe Int
rpMaxHeapBytes RtsPosture
rts, Provenance
FromRts)

{- | The heap ceiling derived from a cgroup memory limit: the limit less the
nursery budget (capabilities x allocation area -- memory the process spends over
and above the heap) less 10% slack for stacks, buffers, and the RTS itself,
floored at half the limit so a nursery mis-sized for a tiny pod still yields a
sane ceiling rather than a vanishing (or negative) one.
-}
deriveMaxHeapBytes :: Int -> Int -> Int -> Int
deriveMaxHeapBytes :: Int -> Int -> Int -> Int
deriveMaxHeapBytes Int
memMax Int
capabilities Int
allocAreaBytes =
    Int -> Int
alignToBlock (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Int
memMax Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
nursery Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
slack) (Int
memMax Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2))
  where
    nursery :: Int
nursery = Int
capabilities Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
allocAreaBytes
    slack :: Int
slack = Int
memMax Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
10

{- A heap ceiling rounded down to the RTS's 4 KiB block granularity (and at least
one block): the RTS stores @-M@ in blocks, so a non-multiple value would read back
rounded and the plan would forever look unapplied after the re-exec. -}
alignToBlock :: Int -> Int
alignToBlock :: Int -> Int
alignToBlock Int
bytes = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
rtsBlockBytes (Int
bytes Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
bytes Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
rtsBlockBytes)

{- | The RTS flags the plan requires beyond the live posture, in @GHCRTS@ syntax:
a @-N@ when the capability count must change, a @-M@ when a ceiling must be
enforced that is not already in force. Empty when the process is already running
the plan. A 'FromRts' entry never contributes a flag (it /is/ the live posture).
-}
requiredRtsFlags :: RtsPosture -> RuntimePlan -> [Text]
requiredRtsFlags :: RtsPosture -> RuntimePlan -> [Text]
requiredRtsFlags RtsPosture
rts RuntimePlan
plan =
    [Maybe Text] -> [Text]
forall a. [Maybe a] -> [a]
catMaybes [Maybe Text
capsFlag, Maybe Text
heapFlag]
  where
    capsFlag :: Maybe Text
capsFlag = case RuntimePlan -> (Int, Provenance)
planCapabilities RuntimePlan
plan of
        (Int
_, Provenance
FromRts) -> Maybe Text
forall a. Maybe a
Nothing
        (Int
n, Provenance
_)
            | Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== RtsPosture -> Int
rpCapabilities RtsPosture
rts -> Maybe Text
forall a. Maybe a
Nothing
            | Bool
otherwise -> Text -> Maybe Text
forall a. a -> Maybe a
Just (Text
"-N" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Int -> Text
forall b a. (Show a, IsString b) => a -> b
show Int
n)

    heapFlag :: Maybe Text
heapFlag = case RuntimePlan -> (Maybe Int, Provenance)
planMaxHeapBytes RuntimePlan
plan of
        (Maybe Int
_, Provenance
FromRts) -> Maybe Text
forall a. Maybe a
Nothing
        (Maybe Int
Nothing, Provenance
_) -> Maybe Text
forall a. Maybe a
Nothing
        (Just Int
bytes, Provenance
_)
            | Int -> Maybe Int
forall a. a -> Maybe a
Just Int
bytes Maybe Int -> Maybe Int -> Bool
forall a. Eq a => a -> a -> Bool
== RtsPosture -> Maybe Int
rpMaxHeapBytes RtsPosture
rts -> Maybe Text
forall a. Maybe a
Nothing
            | Bool
otherwise -> Text -> Maybe Text
forall a. a -> Maybe a
Just (Text
"-M" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Int -> Text
forall b a. (Show a, IsString b) => a -> b
show Int
bytes)

{- | The boot log's posture lines, one decision per line with its provenance, plus
the allocation-area line (always RTS-sourced; it is deliberately not config-surfaced).
Rendered from the __plan__, so the lines describe what the process runs with after
the plan is applied.
-}
renderRuntimePosture :: RuntimePlan -> RtsPosture -> [Text]
renderRuntimePosture :: RuntimePlan -> RtsPosture -> [Text]
renderRuntimePosture RuntimePlan
plan RtsPosture
rts =
    [ Text
"runtime: capabilities " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Int -> Text
forall b a. (Show a, IsString b) => a -> b
show ((Int, Provenance) -> Int
forall a b. (a, b) -> a
fst (RuntimePlan -> (Int, Provenance)
planCapabilities RuntimePlan
plan)) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Provenance -> Text
renderProvenance ((Int, Provenance) -> Provenance
forall a b. (a, b) -> b
snd (RuntimePlan -> (Int, Provenance)
planCapabilities RuntimePlan
plan))
    , case RuntimePlan -> (Maybe Int, Provenance)
planMaxHeapBytes RuntimePlan
plan of
        (Just Int
bytes, Provenance
prov) -> Text
"runtime: max heap " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Int -> Text
renderMiB Int
bytes Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Provenance -> Text
renderProvenance Provenance
prov
        (Maybe Int
Nothing, Provenance
_) -> Text
"runtime: max heap unbounded (the container memory limit is the only backstop; set maxHeapBytes or -M for a graceful ceiling)"
    , Text
"runtime: allocation area "
        Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Int -> Text
renderMiB (RtsPosture -> Int
rpAllocAreaBytes RtsPosture
rts)
        Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"/capability"
        Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> (Int -> Text) -> Maybe Int -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" (\Int
c -> Text
", nursery chunks " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Int -> Text
renderMiB Int
c) (RtsPosture -> Maybe Int
rpNurseryChunkBytes RtsPosture
rts)
        Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" (RTS; tune with GHCRTS, see USAGE.md)"
    ]

renderProvenance :: Provenance -> Text
renderProvenance :: Provenance -> Text
renderProvenance = \case
    Provenance
FromConfig -> Text
" (from config)"
    Provenance
FromCgroup -> Text
" (derived from the cgroup limit)"
    Provenance
FromRts -> Text
" (as the RTS resolved it)"

-- A byte count in MiB: whole when exact, else to one decimal place.
renderMiB :: Int -> Text
renderMiB :: Int -> Text
renderMiB Int
bytes =
    let mib :: Double
mib = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
bytes Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
1024 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1024) :: Double
     in if Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round Double
mib :: Int) Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
mib
            then Int -> Text
forall b a. (Show a, IsString b) => a -> b
show (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round Double
mib :: Int) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" MiB"
            else String -> Text
forall a. ToText a => a -> Text
toText (Double -> String
showRounded Double
mib) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" MiB"

showRounded :: Double -> String
showRounded :: Double -> String
showRounded Double
x = Double -> String
forall b a. (Show a, IsString b) => a -> b
show (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
10) :: Int) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
10 :: Double)

{- | Parse a cgroup-v2 @cpu.max@ body: @\"<quota> <period>\"@ yields the granted
cores (quota over period); the @\"max ...\"@ sentinel (no quota) yields 'Nothing'.
A malformed body yields 'Nothing' -- no limit is inferred from noise.
-}
parseCpuMax :: Text -> Maybe Double
parseCpuMax :: Text -> Maybe Double
parseCpuMax Text
body = case Text -> [Text]
T.words (Text -> Text
T.strip Text
body) of
    [Text
quota, Text
period] -> do
        q <- String -> Maybe Double
forall a. Read a => String -> Maybe a
readMaybe (Text -> String
forall a. ToString a => a -> String
toString Text
quota) :: Maybe Double
        p <- readMaybe (toString period) :: Maybe Double
        guard (q > 0 && p > 0)
        pure (q / p)
    [Text]
_ -> Maybe Double
forall a. Maybe a
Nothing

{- | Parse a cgroup-v2 @memory.max@ body: a byte count, or the unlimited @max@
sentinel ('Nothing'). A malformed body yields 'Nothing'.
-}
parseMemoryMax :: Text -> Maybe Int
parseMemoryMax :: Text -> Maybe Int
parseMemoryMax Text
body = do
    n <- String -> Maybe Int
forall a. Read a => String -> Maybe a
readMaybe (Text -> String
forall a. ToString a => a -> String
toString (Text -> Text
T.strip Text
body)) :: Maybe Int
    guard (n > 0)
    pure n

{- | Resolve the runtime plan and apply it, first thing at boot.

Reads the live posture and the cgroup, resolves the plan against the given config
values, and then:

* plan already in force: log the posture lines and return;
* only the capability count differs: apply it in-process
  ('setNumCapabilities'), log, and return;
* a heap ceiling must be enforced: append the required flags to @GHCRTS@ and
  __exec this binary in place__ (same PID, same arguments), once, guarded by
  'reexecMarker'. The re-launched process resolves the same plan, finds it in
  force, and logs the posture lines as normal.

When the marker is already set and the posture /still/ diverges (an operator's
@GHCRTS@ contradicting the config, or a flag the RTS rejected), the divergence is
logged as a warning and the process continues with what the RTS gave it -- boot
never loops and never aborts over tuning.
-}
applyRuntimePosture :: (Text -> IO ()) -> (Text -> IO ()) -> Maybe Int -> Maybe Int -> IO ()
applyRuntimePosture :: (Text -> IO ())
-> (Text -> IO ()) -> Maybe Int -> Maybe Int -> IO ()
applyRuntimePosture Text -> IO ()
logInfo Text -> IO ()
logWarning Maybe Int
cfgCores Maybe Int
cfgMaxHeap = do
    rts <- IO RtsPosture
currentRtsPosture
    cgroup <- readCgroupLimits
    let plan = Maybe Int -> Maybe Int -> CgroupLimits -> RtsPosture -> RuntimePlan
resolveRuntimePlan Maybe Int
cfgCores Maybe Int
cfgMaxHeap CgroupLimits
cgroup RtsPosture
rts
        flags = RtsPosture -> RuntimePlan -> [Text]
requiredRtsFlags RtsPosture
rts RuntimePlan
plan
    alreadyApplied <- isJust <$> lookupEnv reexecMarker
    case flags of
        [] -> RuntimePlan -> RtsPosture -> IO ()
logPosture RuntimePlan
plan RtsPosture
rts
        [Text]
_ | Bool
alreadyApplied -> do
            (Text -> IO ()) -> [Text] -> IO ()
warnStillDivergent Text -> IO ()
logWarning [Text]
flags
            RuntimePlan -> RtsPosture -> IO ()
logPosture RuntimePlan
plan RtsPosture
rts
        [Text
capsOnly]
            | Text
"-N" Text -> Text -> Bool
`T.isPrefixOf` Text
capsOnly -> do
                Int -> IO ()
setNumCapabilities ((Int, Provenance) -> Int
forall a b. (a, b) -> a
fst (RuntimePlan -> (Int, Provenance)
planCapabilities RuntimePlan
plan))
                RuntimePlan -> RtsPosture -> IO ()
logPosture RuntimePlan
plan RtsPosture
rts{rpCapabilities = fst (planCapabilities plan)}
        [Text]
_ -> do
            (Text -> IO ()) -> (Text -> IO ()) -> [Text] -> IO ()
reexecOrWarn Text -> IO ()
logInfo Text -> IO ()
logWarning [Text]
flags
            RuntimePlan -> RtsPosture -> IO ()
logPosture RuntimePlan
plan RtsPosture
rts
  where
    logPosture :: RuntimePlan -> RtsPosture -> IO ()
logPosture RuntimePlan
plan RtsPosture
rts = (Text -> IO ()) -> [Text] -> IO ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
(a -> f b) -> t a -> f ()
traverse_ Text -> IO ()
logInfo (RuntimePlan -> RtsPosture -> [Text]
renderRuntimePosture RuntimePlan
plan RtsPosture
rts)

-- The already-re-launched process found its plan still unapplied: warn and
-- continue with the live posture.
warnStillDivergent :: (Text -> IO ()) -> [Text] -> IO ()
warnStillDivergent :: (Text -> IO ()) -> [Text] -> IO ()
warnStillDivergent Text -> IO ()
logWarning [Text]
flags =
    Text -> IO ()
logWarning
        ( Text
"runtime: the resolved plan still requires "
            Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> [Text] -> Text
T.intercalate Text
" " [Text]
flags
            Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" after re-launch; an operator GHCRTS may be overriding the configuration, or the RTS rejected a flag. Continuing with the live posture."
        )

{- Tuning must never take the service down: a failed exec (essentially
unreachable -- the path is /proc/self/exe -- but not guaranteed) is degraded to a
warning and an unenforced posture, never an abort. The exec itself never returns
on success, so reaching the continuation at all means it failed. -}
reexecOrWarn :: (Text -> IO ()) -> (Text -> IO ()) -> [Text] -> IO ()
reexecOrWarn :: (Text -> IO ()) -> (Text -> IO ()) -> [Text] -> IO ()
reexecOrWarn Text -> IO ()
logInfo Text -> IO ()
logWarning [Text]
flags =
    IO () -> IO (Either IOException ())
forall (m :: * -> *) a.
MonadUnliftIO m =>
m a -> m (Either IOException a)
tryIO ((Text -> IO ()) -> [Text] -> IO ()
reexecWith Text -> IO ()
logInfo [Text]
flags) IO (Either IOException ())
-> (Either IOException () -> IO ()) -> IO ()
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
        Left IOException
err ->
            Text -> IO ()
logWarning
                ( Text
"runtime: re-launching to apply "
                    Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> [Text] -> Text
T.intercalate Text
" " [Text]
flags
                    Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" failed ("
                    Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> IOException -> Text
forall b a. (Show a, IsString b) => a -> b
show IOException
err
                    Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"); continuing with the live posture, unenforced."
                )
        Right () -> IO ()
forall (f :: * -> *). Applicative f => f ()
pass

-- The live RTS posture, converted from the flag fields' 4 KiB blocks to bytes.
currentRtsPosture :: IO RtsPosture
currentRtsPosture :: IO RtsPosture
currentRtsPosture = do
    capabilities <- IO Int
getNumCapabilities
    processors <- getNumProcessors
    gc <- getGCFlags
    let blocks a
n = a -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
rtsBlockBytes
    pure
        RtsPosture
            { rpCapabilities = capabilities
            , rpProcessors = processors
            , rpAllocAreaBytes = blocks (minAllocAreaSize gc)
            , rpNurseryChunkBytes = nonZero (blocks (nurseryChunkSize gc))
            , rpMaxHeapBytes = nonZero (blocks (maxHeapSize gc))
            }
  where
    nonZero :: a -> Maybe a
nonZero a
n = if a
n a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
0 then Maybe a
forall a. Maybe a
Nothing else a -> Maybe a
forall a. a -> Maybe a
Just a
n

-- The RTS flag fields ('minAllocAreaSize', 'nurseryChunkSize', 'maxHeapSize')
-- count blocks of this many bytes (verified against GHC 9.10: -A64m reads back as
-- 16384, -M500m as 128000).
rtsBlockBytes :: Int
rtsBlockBytes :: Int
rtsBlockBytes = Int
4096

{- The cgroup-v2 limits that bind this process: its own cgroup (resolved from
@\/proc\/self\/cgroup@) and every ancestor up to the mount root, each axis taking the
__tightest__ limit found along the walk. Inside a container with a private cgroup
namespace the process's cgroup /is/ the visible root, so the walk is one step; on a
host (or a pod whose limit sits on a parent slice) the leaf alone would miss the
binding limit, which is why the ancestors are consulted too. Absent files and the
@max@ sentinel read as no limit at that level; a host with no cgroup v2 mounted
yields no limits at all. -}
readCgroupLimits :: IO CgroupLimits
readCgroupLimits :: IO CgroupLimits
readCgroupLimits = do
    selfCgroup <- String -> IO (Maybe Text)
readIfExists String
"/proc/self/cgroup"
    let relative = Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"/" (Maybe Text
selfCgroup Maybe Text -> (Text -> Maybe Text) -> Maybe Text
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> Maybe Text
parseCgroupSelfPath)
        dirs = [String
cgroupRoot String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Text -> String
forall a. ToString a => a -> String
toString Text
suffix | Text
suffix <- Text -> [Text]
ancestorPaths Text
relative]
    cpus <- traverse (limitAt parseCpuMax "/cpu.max") dirs
    memories <- traverse (limitAt parseMemoryMax "/memory.max") dirs
    pure
        CgroupLimits
            { cgCpuCores = tightest cpus
            , cgMemoryMaxBytes = tightest memories
            }
  where
    cgroupRoot :: String
cgroupRoot = String
"/sys/fs/cgroup"

    limitAt :: (Text -> Maybe a) -> String -> FilePath -> IO (Maybe a)
    limitAt :: forall a. (Text -> Maybe a) -> String -> String -> IO (Maybe a)
limitAt Text -> Maybe a
parse String
file String
dir = (Maybe Text -> (Text -> Maybe a) -> Maybe a
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> Maybe a
parse) (Maybe Text -> Maybe a) -> IO (Maybe Text) -> IO (Maybe a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO (Maybe Text)
readIfExists (String
dir String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
file)

    tightest :: (Ord a) => [Maybe a] -> Maybe a
    tightest :: forall a. Ord a => [Maybe a] -> Maybe a
tightest [Maybe a]
found = case [Maybe a] -> [a]
forall a. [Maybe a] -> [a]
catMaybes [Maybe a]
found of
        [] -> Maybe a
forall a. Maybe a
Nothing
        (a
x : [a]
xs) -> a -> Maybe a
forall a. a -> Maybe a
Just ((a -> a -> a) -> a -> [a] -> a
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' a -> a -> a
forall a. Ord a => a -> a -> a
min a
x [a]
xs)

    readIfExists :: FilePath -> IO (Maybe Text)
    readIfExists :: String -> IO (Maybe Text)
readIfExists String
path =
        Either () Text -> Maybe Text
forall l r. Either l r -> Maybe r
rightToMaybe (Either () Text -> Maybe Text)
-> IO (Either () Text) -> IO (Maybe Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (IOException -> Maybe ()) -> IO Text -> IO (Either () Text)
forall (m :: * -> *) e b a.
(MonadUnliftIO m, Exception e) =>
(e -> Maybe b) -> m a -> m (Either b a)
tryJust (Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Maybe ())
-> (IOException -> Bool) -> IOException -> Maybe ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IOException -> Bool
isDoesNotExistError) (ByteString -> Text
forall a b. ConvertUtf8 a b => b -> a
decodeUtf8 (ByteString -> Text) -> IO ByteString -> IO Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO ByteString
forall (m :: * -> *). MonadIO m => String -> m ByteString
readFileBS String
path)

{- | The process's cgroup-v2 path from a @\/proc\/self\/cgroup@ body: the @0::@
line's path (@"0::\/a\/b"@ yields @"\/a\/b"@). 'Nothing' when no v2 entry is
present (a pure cgroup-v1 host).
-}
parseCgroupSelfPath :: Text -> Maybe Text
parseCgroupSelfPath :: Text -> Maybe Text
parseCgroupSelfPath Text
body =
    [Text] -> Maybe Text
forall a. [a] -> Maybe a
listToMaybe ((Text -> Maybe Text) -> [Text] -> [Text]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (Text -> Text -> Maybe Text
T.stripPrefix Text
"0::") (Text -> [Text]
forall t. IsText t "lines" => t -> [t]
lines (Text -> Text
T.strip Text
body)))

{- | A cgroup path and its ancestors, leaf first, ending at the root (the empty
suffix): @"\/a\/b"@ yields @["\/a\/b", "\/a", ""]@; the root path @"\/"@
yields just @[""]@.
-}
ancestorPaths :: Text -> [Text]
ancestorPaths :: Text -> [Text]
ancestorPaths Text
path = case (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) (HasCallStack => Text -> Text -> [Text]
Text -> Text -> [Text]
T.splitOn Text
"/" (Text -> Text
T.strip Text
path)) of
    [] -> [Text
""]
    [Text]
segments ->
        [[Text] -> Text
T.concat [Text
"/" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
seg | Text
seg <- Int -> [Text] -> [Text]
forall a. Int -> [a] -> [a]
take Int
n [Text]
segments] | Int
n <- [[Text] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
segments, [Text] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
segments Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 .. Int
1]] [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text
""]

{- The environment marker that makes the exec-in-place a one-shot: set on the
re-launched process's environment, checked before any further exec. Deliberately
__outside__ the @ECLUSE_@ prefix: everything under that prefix is claimed by the
configuration environment layer (and rejected when unknown), and this marker is
process plumbing, not configuration. -}
reexecMarker :: String
reexecMarker :: String
reexecMarker = String
"__ECLUSE_RUNTIME_RTS_APPLIED"

{- Exec this binary in place with the required flags appended to @GHCRTS@ (later
flags win over both the baked defaults and any earlier operator flags) and the
loop-guard marker set. Same executable path, same arguments, same PID -- the
process never exits, so a container supervisor sees one uninterrupted process. -}
reexecWith :: (Text -> IO ()) -> [Text] -> IO ()
reexecWith :: (Text -> IO ()) -> [Text] -> IO ()
reexecWith Text -> IO ()
logInfo [Text]
flags = do
    self <- IO String
getExecutablePath
    args <- getArgs
    env <- getEnvironment
    let prior = (String, String) -> String
forall a b. (a, b) -> b
snd ((String, String) -> String)
-> Maybe (String, String) -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((String, String) -> Bool)
-> [(String, String)] -> Maybe (String, String)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find ((String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"GHCRTS") (String -> Bool)
-> ((String, String) -> String) -> (String, String) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String, String) -> String
forall a b. (a, b) -> a
fst) [(String, String)]
env
        appended = Text -> (String -> Text) -> Maybe String -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
newFlags (\String
p -> String -> Text
forall a. ToText a => a -> Text
toText String
p Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
newFlags) Maybe String
prior
        env' =
            ((String
"GHCRTS", Text -> String
forall a. ToString a => a -> String
toString Text
appended) (String, String) -> [(String, String)] -> [(String, String)]
forall a. a -> [a] -> [a]
:)
                ([(String, String)] -> [(String, String)])
-> ([(String, String)] -> [(String, String)])
-> [(String, String)]
-> [(String, String)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((String
reexecMarker, String
"1") (String, String) -> [(String, String)] -> [(String, String)]
forall a. a -> [a] -> [a]
:)
                ([(String, String)] -> [(String, String)])
-> ([(String, String)] -> [(String, String)])
-> [(String, String)]
-> [(String, String)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((String, String) -> Bool)
-> [(String, String)] -> [(String, String)]
forall a. (a -> Bool) -> [a] -> [a]
filter (\(String
k, String
_) -> String
k String -> String -> Bool
forall a. Eq a => a -> a -> Bool
/= String
"GHCRTS" Bool -> Bool -> Bool
&& String
k String -> String -> Bool
forall a. Eq a => a -> a -> Bool
/= String
reexecMarker)
                ([(String, String)] -> [(String, String)])
-> [(String, String)] -> [(String, String)]
forall a b. (a -> b) -> a -> b
$ [(String, String)]
env
    logInfo ("runtime: re-launching with GHCRTS " <> appended <> " to apply the resolved plan (same process, exec in place)")
    executeFile self False args (Just env')
  where
    newFlags :: Text
newFlags = Text -> [Text] -> Text
T.intercalate Text
" " [Text]
flags