{- | Response-bound guards for the proxy's data plane.

Écluse parses whatever an upstream returns. This module provides the pure guard layer
that prevents those steps from exhausting resources due to hostile or oversized input.

__How much an upstream may cost:__ A 'Limits' budget plus 'boundedRead' (abort a
streamed body past 'maxBodyBytes') and 'checkVersionCount' \/ 'checkNestingDepth'
(reject an oversized or deeply-nested parsed document) bound algorithmic-complexity
DoS from a hostile or compromised upstream. Every limit __fails closed__: exceeding
one yields 'Left', never a truncated or partial result.
-}
module Ecluse.Core.Security.Limits (
    -- * Response bounds
    Limits (..),
    defaultLimits,
    LimitError (..),
    boundedRead,
    checkVersionCount,
    checkNestingDepth,
    withinNestingBudget,
) where

import Data.Aeson (Value (Array, Bool, Null, Number, Object, String))
import Data.Aeson.KeyMap qualified as KeyMap
import Data.ByteString qualified as BS
import Data.ByteString.Builder (byteString, toLazyByteString)
import Data.ByteString.Lazy qualified as BSL
import Data.Map.Strict qualified as Map
import Data.Vector qualified as V

import Ecluse.Core.Package (PackageInfo, infoVersions)

{- | Resource budget for a single upstream response. Every field is a hard
ceiling enforced fail-closed: exceeding one aborts with a 'LimitError' rather
than returning a truncated or partially-parsed result. These bound the
algorithmic-complexity DoS a hostile or compromised upstream can inflict by
returning a huge or pathological document.

The metadata ceilings are layered. 'maxBodyBytes' (through 'boundedRead') is the
__primary, pre-decode__ bound: it caps the parse spend before aeson runs, so a
hostile body is aborted while still streaming and never reaches the decoder whole.
The post-projection 'maxVersionCount' ('checkVersionCount') is a __deliberate
defence-in-depth__ semantic backstop /behind/ it -- it refuses an over-versioned
packument after projection, bounding per-version work the byte cap already keeps
finite.
-}
data Limits = Limits
    { Limits -> Int
maxBodyBytes :: Int
    {- ^ Largest response body, in bytes, 'boundedRead' will accumulate before
    aborting. Bounds memory on the metadata path (artifacts are streamed, not
    buffered).
    -}
    , Limits -> Int
maxVersionCount :: Int
    {- ^ Largest number of versions a parsed packument may carry
    ('checkVersionCount'); bounds per-version rule evaluation.
    -}
    , Limits -> Int
maxNestingDepth :: Int
    {- ^ Deepest JSON nesting a decoded document may reach ('checkNestingDepth');
    bounds stack\/CPU on pathologically nested input.
    -}
    }
    deriving stock (Limits -> Limits -> Bool
(Limits -> Limits -> Bool)
-> (Limits -> Limits -> Bool) -> Eq Limits
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Limits -> Limits -> Bool
== :: Limits -> Limits -> Bool
$c/= :: Limits -> Limits -> Bool
/= :: Limits -> Limits -> Bool
Eq, Int -> Limits -> ShowS
[Limits] -> ShowS
Limits -> String
(Int -> Limits -> ShowS)
-> (Limits -> String) -> ([Limits] -> ShowS) -> Show Limits
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Limits -> ShowS
showsPrec :: Int -> Limits -> ShowS
$cshow :: Limits -> String
show :: Limits -> String
$cshowList :: [Limits] -> ShowS
showList :: [Limits] -> ShowS
Show)

{- | Sane defaults for 'Limits'. Generous enough for real registry documents and
tight enough to fail closed on pathological input: a 12 MiB metadata body, 100k
versions, and 64 levels of JSON nesting. Override per deployment as needed.
-}
defaultLimits :: Limits
defaultLimits :: Limits
defaultLimits =
    Limits
        { maxBodyBytes :: Int
maxBodyBytes = Int
12 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
1024 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
1024
        , maxVersionCount :: Int
maxVersionCount = Int
100_000
        , maxNestingDepth :: Int
maxNestingDepth = Int
64
        }

-- | Which 'Limits' ceiling a response exceeded.
data LimitError
    = -- | The body exceeded 'maxBodyBytes'; carries the configured ceiling.
      BodyTooLarge Int
    | {- | The packument carried more than 'maxVersionCount' versions; carries the
      count seen and the ceiling.
      -}
      TooManyVersions Int Int
    | -- | JSON nesting exceeded 'maxNestingDepth'; carries the ceiling.
      TooDeeplyNested Int
    deriving stock (LimitError -> LimitError -> Bool
(LimitError -> LimitError -> Bool)
-> (LimitError -> LimitError -> Bool) -> Eq LimitError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: LimitError -> LimitError -> Bool
== :: LimitError -> LimitError -> Bool
$c/= :: LimitError -> LimitError -> Bool
/= :: LimitError -> LimitError -> Bool
Eq, Int -> LimitError -> ShowS
[LimitError] -> ShowS
LimitError -> String
(Int -> LimitError -> ShowS)
-> (LimitError -> String)
-> ([LimitError] -> ShowS)
-> Show LimitError
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> LimitError -> ShowS
showsPrec :: Int -> LimitError -> ShowS
$cshow :: LimitError -> String
show :: LimitError -> String
$cshowList :: [LimitError] -> ShowS
showList :: [LimitError] -> ShowS
Show)

{- | Read a streamed body chunk-by-chunk, aborting as soon as the accumulated
size would exceed 'maxBodyBytes'. Polymorphic over the producing monad so the
streaming fetch can run it in 'IO' while tests drive it purely.

@readChunk@ is a chunk producer following the @http-client@ @BodyReader@ contract:
each call yields the next chunk, and an __empty__ 'ByteString' signals end of
input. 'boundedRead' pulls chunks until EOF and returns the concatenated body, or
stops at the first chunk that pushes the running total past 'maxBodyBytes' and
returns @'Left' ('BodyTooLarge' …)@ -- __fail-closed__, never a truncated body. A
zero or negative 'maxBodyBytes' rejects any non-empty body. The bound is checked
__before__ a chunk is retained, so memory never exceeds the limit plus one chunk.
-}
boundedRead :: (Monad m) => Limits -> m ByteString -> m (Either LimitError ByteString)
boundedRead :: forall (m :: * -> *).
Monad m =>
Limits -> m ByteString -> m (Either LimitError ByteString)
boundedRead Limits
limits m ByteString
readChunk = Int -> Builder -> m (Either LimitError ByteString)
go Int
0 Builder
forall a. Monoid a => a
mempty
  where
    cap :: Int
cap = Limits -> Int
maxBodyBytes Limits
limits
    -- Accumulate the body in a forward-built 'Builder' (chunks appended in arrival
    -- order), finalised once at EOF -- no reversed chunk list to undo.
    go :: Int -> Builder -> m (Either LimitError ByteString)
go !Int
seen Builder
acc = do
        chunk <- m ByteString
readChunk
        if BS.null chunk
            then pure (Right (BSL.toStrict (toLazyByteString acc)))
            else
                let seen' = Int
seen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
BS.length ByteString
chunk
                 in if seen' > cap
                        then pure (Left (BodyTooLarge cap))
                        else go seen' (acc <> byteString chunk)

{- | Reject a parsed packument carrying more than 'maxVersionCount' versions,
returning it unchanged when within budget.

Applied after a document is projected to 'Ecluse.Core.Package.PackageInfo' but before
per-version rule evaluation, so the cost of evaluating rules over every version is
bounded by configuration rather than by what an upstream returns. Counts the
'Ecluse.Core.Package.infoVersions' map; on breach returns @'Left' ('TooManyVersions'
count cap)@, otherwise the document unchanged so it threads through a parse
pipeline.
-}
checkVersionCount :: Limits -> PackageInfo -> Either LimitError PackageInfo
checkVersionCount :: Limits -> PackageInfo -> Either LimitError PackageInfo
checkVersionCount Limits
limits PackageInfo
info =
    if Int
count Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
cap
        then LimitError -> Either LimitError PackageInfo
forall a b. a -> Either a b
Left (Int -> Int -> LimitError
TooManyVersions Int
count Int
cap)
        else PackageInfo -> Either LimitError PackageInfo
forall a b. b -> Either a b
Right PackageInfo
info
  where
    cap :: Int
cap = Limits -> Int
maxVersionCount Limits
limits
    count :: Int
count = Map Text PackageDetails -> Int
forall k a. Map k a -> Int
Map.size (PackageInfo -> Map Text PackageDetails
infoVersions PackageInfo
info)

{- | Reject a decoded JSON document nested deeper than 'maxNestingDepth',
returning it unchanged when within budget.

Run on the __already-decoded__ 'Value' -- after the parser has produced it, before
the document is projected to domain types -- so a pathologically nested payload is
refused before any deep /domain/ traversal. It is therefore __not__ the defence
against an unbounded structure: the structure is already /bounded-by-body-size/ by
the time it reaches here, since the @maxBodyBytes@ cap on the streamed read precedes
the decode (a body the parser never finishes reading never produces a 'Value'). This
guard bounds the __traversal cost__ of a within-size-but-deeply-nested document -- the
stack\/CPU a recursive walk of it would spend -- which the body cap alone does not
bound (a small body can still nest deeply). Depth counts container nesting: a scalar
is depth @1@, and each enclosing 'Object'\/'Array' adds one. An empty container
counts as a leaf (depth @1@), since it forces no descent. Traversal short-circuits at
the first sub-tree to breach the ceiling, so a deeply-nested branch costs no more than
the ceiling to reject.
-}
checkNestingDepth :: Limits -> Value -> Either LimitError Value
checkNestingDepth :: Limits -> Value -> Either LimitError Value
checkNestingDepth Limits
limits Value
value =
    if Int -> Value -> Bool
withinNestingBudget (Limits -> Int
maxNestingDepth Limits
limits) Value
value
        then Value -> Either LimitError Value
forall a b. b -> Either a b
Right Value
value
        else LimitError -> Either LimitError Value
forall a b. a -> Either a b
Left (Int -> LimitError
TooDeeplyNested (Limits -> Int
maxNestingDepth Limits
limits))

{- | True iff @value@ nests no deeper than @budget@ levels -- the depth predicate
'checkNestingDepth' decides against 'maxNestingDepth', exposed so a /selective/ decode
that never materialises the whole 'Value' (see
"Ecluse.Core.Registry.Npm.SelectiveDecode") can bound each sub-tree it walks at the same
budget and so reproduce 'checkNestingDepth' over the document exactly.

Depth counts container nesting: a scalar is depth @1@, an empty container is a leaf
(depth @1@, it forces no descent), and each enclosing 'Object'\/'Array' adds one.
Decrements per nested container and fails fast at zero, so a huge sub-tree is not fully
walked.
-}
withinNestingBudget :: Int -> Value -> Bool
withinNestingBudget :: Int -> Value -> Bool
withinNestingBudget Int
budget Value
v =
    Int
budget Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
1 Bool -> Bool -> Bool
&& case Value
v of
        Object Object
o -> (Value -> Bool) -> [Value] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Int -> Value -> Bool
withinNestingBudget (Int
budget Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)) (Object -> [Value]
forall v. KeyMap v -> [v]
KeyMap.elems Object
o)
        Array Array
xs -> (Value -> Bool) -> Array -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.all (Int -> Value -> Bool
withinNestingBudget (Int
budget Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)) Array
xs
        String Text
_ -> Bool
True
        Number Scientific
_ -> Bool
True
        Bool Bool
_ -> Bool
True
        Value
Null -> Bool
True