{- | Response-bound guards for the proxy's data plane. Écluse parses whatever an upstream returns. This module provides the pure guard layer that prevents those steps from exhausting resources due to hostile or oversized input. __How much an upstream may cost:__ A 'Limits' budget plus 'boundedRead' (abort a streamed body past 'maxBodyBytes') and 'checkVersionCount' \/ 'checkNestingDepth' (reject an oversized or deeply-nested parsed document) bound algorithmic-complexity DoS from a hostile or compromised upstream. Every limit __fails closed__: exceeding one yields 'Left', never a truncated or partial result. -} module Ecluse.Core.Security.Limits ( -- * Response bounds Limits (..), defaultLimits, LimitError (..), boundedRead, checkVersionCount, checkNestingDepth, withinNestingBudget, ) where import Data.Aeson (Value (Array, Bool, Null, Number, Object, String)) import Data.Aeson.KeyMap qualified as KeyMap import Data.ByteString qualified as BS import Data.ByteString.Builder (byteString, toLazyByteString) import Data.ByteString.Lazy qualified as BSL import Data.Map.Strict qualified as Map import Data.Vector qualified as V import Ecluse.Core.Package (PackageInfo, infoVersions) {- | Resource budget for a single upstream response. Every field is a hard ceiling enforced fail-closed: exceeding one aborts with a 'LimitError' rather than returning a truncated or partially-parsed result. These bound the algorithmic-complexity DoS a hostile or compromised upstream can inflict by returning a huge or pathological document. The metadata ceilings are layered. 'maxBodyBytes' (through 'boundedRead') is the __primary, pre-decode__ bound: it caps the parse spend before aeson runs, so a hostile body is aborted while still streaming and never reaches the decoder whole. The post-projection 'maxVersionCount' ('checkVersionCount') is a __deliberate defence-in-depth__ semantic backstop /behind/ it -- it refuses an over-versioned packument after projection, bounding per-version work the byte cap already keeps finite. -} data Limits = Limits { Limits -> Int maxBodyBytes :: Int {- ^ Largest response body, in bytes, 'boundedRead' will accumulate before aborting. Bounds memory on the metadata path (artifacts are streamed, not buffered). -} , Limits -> Int maxVersionCount :: Int {- ^ Largest number of versions a parsed packument may carry ('checkVersionCount'); bounds per-version rule evaluation. -} , Limits -> Int maxNestingDepth :: Int {- ^ Deepest JSON nesting a decoded document may reach ('checkNestingDepth'); bounds stack\/CPU on pathologically nested input. -} } deriving stock (Limits -> Limits -> Bool (Limits -> Limits -> Bool) -> (Limits -> Limits -> Bool) -> Eq Limits forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a $c== :: Limits -> Limits -> Bool == :: Limits -> Limits -> Bool $c/= :: Limits -> Limits -> Bool /= :: Limits -> Limits -> Bool Eq, Int -> Limits -> ShowS [Limits] -> ShowS Limits -> String (Int -> Limits -> ShowS) -> (Limits -> String) -> ([Limits] -> ShowS) -> Show Limits forall a. (Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a $cshowsPrec :: Int -> Limits -> ShowS showsPrec :: Int -> Limits -> ShowS $cshow :: Limits -> String show :: Limits -> String $cshowList :: [Limits] -> ShowS showList :: [Limits] -> ShowS Show) {- | Sane defaults for 'Limits'. Generous enough for real registry documents and tight enough to fail closed on pathological input: a 12 MiB metadata body, 100k versions, and 64 levels of JSON nesting. Override per deployment as needed. -} defaultLimits :: Limits defaultLimits :: Limits defaultLimits = Limits { maxBodyBytes :: Int maxBodyBytes = Int 12 Int -> Int -> Int forall a. Num a => a -> a -> a * Int 1024 Int -> Int -> Int forall a. Num a => a -> a -> a * Int 1024 , maxVersionCount :: Int maxVersionCount = Int 100_000 , maxNestingDepth :: Int maxNestingDepth = Int 64 } -- | Which 'Limits' ceiling a response exceeded. data LimitError = -- | The body exceeded 'maxBodyBytes'; carries the configured ceiling. BodyTooLarge Int | {- | The packument carried more than 'maxVersionCount' versions; carries the count seen and the ceiling. -} TooManyVersions Int Int | -- | JSON nesting exceeded 'maxNestingDepth'; carries the ceiling. TooDeeplyNested Int deriving stock (LimitError -> LimitError -> Bool (LimitError -> LimitError -> Bool) -> (LimitError -> LimitError -> Bool) -> Eq LimitError forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a $c== :: LimitError -> LimitError -> Bool == :: LimitError -> LimitError -> Bool $c/= :: LimitError -> LimitError -> Bool /= :: LimitError -> LimitError -> Bool Eq, Int -> LimitError -> ShowS [LimitError] -> ShowS LimitError -> String (Int -> LimitError -> ShowS) -> (LimitError -> String) -> ([LimitError] -> ShowS) -> Show LimitError forall a. (Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a $cshowsPrec :: Int -> LimitError -> ShowS showsPrec :: Int -> LimitError -> ShowS $cshow :: LimitError -> String show :: LimitError -> String $cshowList :: [LimitError] -> ShowS showList :: [LimitError] -> ShowS Show) {- | Read a streamed body chunk-by-chunk, aborting as soon as the accumulated size would exceed 'maxBodyBytes'. Polymorphic over the producing monad so the streaming fetch can run it in 'IO' while tests drive it purely. @readChunk@ is a chunk producer following the @http-client@ @BodyReader@ contract: each call yields the next chunk, and an __empty__ 'ByteString' signals end of input. 'boundedRead' pulls chunks until EOF and returns the concatenated body, or stops at the first chunk that pushes the running total past 'maxBodyBytes' and returns @'Left' ('BodyTooLarge' …)@ -- __fail-closed__, never a truncated body. A zero or negative 'maxBodyBytes' rejects any non-empty body. The bound is checked __before__ a chunk is retained, so memory never exceeds the limit plus one chunk. -} boundedRead :: (Monad m) => Limits -> m ByteString -> m (Either LimitError ByteString) boundedRead :: forall (m :: * -> *). Monad m => Limits -> m ByteString -> m (Either LimitError ByteString) boundedRead Limits limits m ByteString readChunk = Int -> Builder -> m (Either LimitError ByteString) go Int 0 Builder forall a. Monoid a => a mempty where cap :: Int cap = Limits -> Int maxBodyBytes Limits limits -- Accumulate the body in a forward-built 'Builder' (chunks appended in arrival -- order), finalised once at EOF -- no reversed chunk list to undo. go :: Int -> Builder -> m (Either LimitError ByteString) go !Int seen Builder acc = do chunk <- m ByteString readChunk if BS.null chunk then pure (Right (BSL.toStrict (toLazyByteString acc))) else let seen' = Int seen Int -> Int -> Int forall a. Num a => a -> a -> a + ByteString -> Int BS.length ByteString chunk in if seen' > cap then pure (Left (BodyTooLarge cap)) else go seen' (acc <> byteString chunk) {- | Reject a parsed packument carrying more than 'maxVersionCount' versions, returning it unchanged when within budget. Applied after a document is projected to 'Ecluse.Core.Package.PackageInfo' but before per-version rule evaluation, so the cost of evaluating rules over every version is bounded by configuration rather than by what an upstream returns. Counts the 'Ecluse.Core.Package.infoVersions' map; on breach returns @'Left' ('TooManyVersions' count cap)@, otherwise the document unchanged so it threads through a parse pipeline. -} checkVersionCount :: Limits -> PackageInfo -> Either LimitError PackageInfo checkVersionCount :: Limits -> PackageInfo -> Either LimitError PackageInfo checkVersionCount Limits limits PackageInfo info = if Int count Int -> Int -> Bool forall a. Ord a => a -> a -> Bool > Int cap then LimitError -> Either LimitError PackageInfo forall a b. a -> Either a b Left (Int -> Int -> LimitError TooManyVersions Int count Int cap) else PackageInfo -> Either LimitError PackageInfo forall a b. b -> Either a b Right PackageInfo info where cap :: Int cap = Limits -> Int maxVersionCount Limits limits count :: Int count = Map Text PackageDetails -> Int forall k a. Map k a -> Int Map.size (PackageInfo -> Map Text PackageDetails infoVersions PackageInfo info) {- | Reject a decoded JSON document nested deeper than 'maxNestingDepth', returning it unchanged when within budget. Run on the __already-decoded__ 'Value' -- after the parser has produced it, before the document is projected to domain types -- so a pathologically nested payload is refused before any deep /domain/ traversal. It is therefore __not__ the defence against an unbounded structure: the structure is already /bounded-by-body-size/ by the time it reaches here, since the @maxBodyBytes@ cap on the streamed read precedes the decode (a body the parser never finishes reading never produces a 'Value'). This guard bounds the __traversal cost__ of a within-size-but-deeply-nested document -- the stack\/CPU a recursive walk of it would spend -- which the body cap alone does not bound (a small body can still nest deeply). Depth counts container nesting: a scalar is depth @1@, and each enclosing 'Object'\/'Array' adds one. An empty container counts as a leaf (depth @1@), since it forces no descent. Traversal short-circuits at the first sub-tree to breach the ceiling, so a deeply-nested branch costs no more than the ceiling to reject. -} checkNestingDepth :: Limits -> Value -> Either LimitError Value checkNestingDepth :: Limits -> Value -> Either LimitError Value checkNestingDepth Limits limits Value value = if Int -> Value -> Bool withinNestingBudget (Limits -> Int maxNestingDepth Limits limits) Value value then Value -> Either LimitError Value forall a b. b -> Either a b Right Value value else LimitError -> Either LimitError Value forall a b. a -> Either a b Left (Int -> LimitError TooDeeplyNested (Limits -> Int maxNestingDepth Limits limits)) {- | True iff @value@ nests no deeper than @budget@ levels -- the depth predicate 'checkNestingDepth' decides against 'maxNestingDepth', exposed so a /selective/ decode that never materialises the whole 'Value' (see "Ecluse.Core.Registry.Npm.SelectiveDecode") can bound each sub-tree it walks at the same budget and so reproduce 'checkNestingDepth' over the document exactly. Depth counts container nesting: a scalar is depth @1@, an empty container is a leaf (depth @1@, it forces no descent), and each enclosing 'Object'\/'Array' adds one. Decrements per nested container and fails fast at zero, so a huge sub-tree is not fully walked. -} withinNestingBudget :: Int -> Value -> Bool withinNestingBudget :: Int -> Value -> Bool withinNestingBudget Int budget Value v = Int budget Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >= Int 1 Bool -> Bool -> Bool && case Value v of Object Object o -> (Value -> Bool) -> [Value] -> Bool forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool all (Int -> Value -> Bool withinNestingBudget (Int budget Int -> Int -> Int forall a. Num a => a -> a -> a - Int 1)) (Object -> [Value] forall v. KeyMap v -> [v] KeyMap.elems Object o) Array Array xs -> (Value -> Bool) -> Array -> Bool forall a. (a -> Bool) -> Vector a -> Bool V.all (Int -> Value -> Bool withinNestingBudget (Int budget Int -> Int -> Int forall a. Num a => a -> a -> a - Int 1)) Array xs String Text _ -> Bool True Number Scientific _ -> Bool True Bool Bool _ -> Bool True Value Null -> Bool True