{- | Outbound-request guards for the proxy's data plane: defending how an upstream URL is derived. Écluse builds outbound HTTP requests from two untrusted sources -- __client-supplied package identifiers__ (the request path) and __upstream-supplied artifact locations__ (a packument's @dist.tarball@). This module provides the pure guard layer that enforces safe URL construction. __How an upstream URL is derived:__ 'upstreamUrlFor' builds an artifact\/metadata URL from a configured base URL and an __already-parsed__ 'PackageName', never from raw client path segments, re-checking each name component with the router's own safety rule so traversal, encoded slashes, or an absolute URL cannot change the target. -} module Ecluse.Core.Security.Url ( -- * Identifier → URL safety upstreamUrlFor, UrlError (..), ) where import Data.Text qualified as T import Ecluse.Core.Package (PackageName, renderPackageName) import Ecluse.Core.Server.Route (encodeComponent, isSafeComponent) import Ecluse.Core.Text (joinUrlPath) -- | Why building an upstream URL from an identifier was refused. data UrlError = {- | A name component (scope or base name) is unsafe to interpolate -- see 'Ecluse.Core.Server.Route.isSafeComponent'. Carries the offending component. -} UnsafeComponent Text | -- | The configured base URL is empty, so no URL can be formed. EmptyBaseUrl deriving stock (UrlError -> UrlError -> Bool (UrlError -> UrlError -> Bool) -> (UrlError -> UrlError -> Bool) -> Eq UrlError forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a $c== :: UrlError -> UrlError -> Bool == :: UrlError -> UrlError -> Bool $c/= :: UrlError -> UrlError -> Bool /= :: UrlError -> UrlError -> Bool Eq, Int -> UrlError -> ShowS [UrlError] -> ShowS UrlError -> String (Int -> UrlError -> ShowS) -> (UrlError -> String) -> ([UrlError] -> ShowS) -> Show UrlError forall a. (Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a $cshowsPrec :: Int -> UrlError -> ShowS showsPrec :: Int -> UrlError -> ShowS $cshow :: UrlError -> String show :: UrlError -> String $cshowList :: [UrlError] -> ShowS showList :: [UrlError] -> ShowS Show) {- | Build an upstream URL for a package from a configured base URL and an __already-parsed__ 'PackageName'. This is the only sanctioned way to derive an upstream URL for a package: the target is @{baseUrl}\/{path}@, where @path@ is built from the package's structural components and @baseUrl@ is __configuration__, never a client-supplied path. The client never chooses the host or the path prefix -- only which (validated) package -- so @..\/@ traversal, an encoded slash, an absolute URL, or a CRLF in the original request cannot steer the fetch elsewhere (see the module header). The path is built with two complementary defences. First, although a 'PackageName' is normally produced by the router's already-safe parse, its smart constructor does no validation, so this __re-checks every structural component__ (scope and base name) with the router's own 'Ecluse.Core.Server.Route.isSafeComponent' -- a name carrying a @\'\/\'@, @\'\\\\\'@, control character, or a @"."@\/@".."@ component is refused with 'UnsafeComponent' rather than interpolated. Second, each accepted component is then __percent-encoded__ ('Ecluse.Core.Server.Route.encodeComponent') around the structural @\'\@\'@ sigil and @%2F@ scope separator this builder writes -- so a @\'%\'@, @\'?\'@, @\'#\'@, or other reserved byte the denylist accepts (notably a once-decoded @%2e%2e%2f@) cannot reach the upstream URL raw. A scoped @\@scope\/name@ therefore yields exactly one @%2F@ (the separator written here, not an encoding of a component), with no double-encoding. An empty @baseUrl@ is refused with 'EmptyBaseUrl'. A single trailing slash on @baseUrl@ is tolerated so the join never doubles it. -} upstreamUrlFor :: Text -> PackageName -> Either UrlError Text upstreamUrlFor :: Text -> PackageName -> Either UrlError Text upstreamUrlFor Text baseUrl PackageName name | Text -> Bool T.null Text baseUrl = UrlError -> Either UrlError Text forall a b. a -> Either a b Left UrlError EmptyBaseUrl | Bool otherwise = case [Text] -> Maybe Text firstUnsafe (NameParts -> [Text] componentsOf NameParts parts) of Just Text bad -> UrlError -> Either UrlError Text forall a b. a -> Either a b Left (Text -> UrlError UnsafeComponent Text bad) Maybe Text Nothing -> Text -> Either UrlError Text forall a b. b -> Either a b Right (Text -> Text -> Text joinUrlPath Text baseUrl (NameParts -> Text encodePath NameParts parts)) where parts :: NameParts parts = PackageName -> NameParts nameParts PackageName name firstUnsafe :: [Text] -> Maybe Text firstUnsafe :: [Text] -> Maybe Text firstUnsafe = (Text -> Bool) -> [Text] -> Maybe Text forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a find (Bool -> Bool not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool forall b c a. (b -> c) -> (a -> b) -> a -> c . Text -> Bool isSafeComponent) {- The structural decomposition of a package name for URL building: a scope with a base name, or a single component, recovered by splitting the rendered name on the @\'\/\'@ scope separator so a legitimate scoped name's own separator is not judged as unsafe content. One source of truth for both the safety re-check ('componentsOf') and the encoded path ('encodePath'), so the two cannot disagree about where the component boundaries are. A leading @\'\@\'@ with no @\'\/\'@ (or an empty base) is a single component (the @\@foo@ fallback). -} data NameParts = -- A scoped name: scope and base, each a component to check and encode. Scoped Text Text | -- An unscoped (or @\@@-leading, separator-free) name: one component. Single Text nameParts :: PackageName -> NameParts nameParts :: PackageName -> NameParts nameParts PackageName name = let rendered :: Text rendered = PackageName -> Text renderPackageName PackageName name in case Text -> Text -> Maybe Text T.stripPrefix Text "@" Text rendered of Just Text scopeAndBase -> let (Text scope, Text base) = HasCallStack => Text -> Text -> (Text, Text) Text -> Text -> (Text, Text) T.breakOn Text "/" Text scopeAndBase in if Text -> Bool T.null Text base then Text -> NameParts Single Text rendered else Text -> Text -> NameParts Scoped Text scope (Int -> Text -> Text T.drop Int 1 Text base) Maybe Text Nothing -> Text -> NameParts Single Text rendered -- The components each of which must independently pass 'isSafeComponent'. componentsOf :: NameParts -> [Text] componentsOf :: NameParts -> [Text] componentsOf = \case Scoped Text scope Text base -> [Text scope, Text base] Single Text c -> [Text c] {- The encoded on-the-wire path for the components: the @\'\@\'@ sigil and the @%2F@ scope separator are written here, around each percent-encoded component, so a legitimate scoped name carries exactly one @%2F@ and no component byte can alter the URL's shape. -} encodePath :: NameParts -> Text encodePath :: NameParts -> Text encodePath = \case Scoped Text scope Text base -> Text "@" Text -> Text -> Text forall a. Semigroup a => a -> a -> a <> Text -> Text encodeComponent Text scope Text -> Text -> Text forall a. Semigroup a => a -> a -> a <> Text "%2F" Text -> Text -> Text forall a. Semigroup a => a -> a -> a <> Text -> Text encodeComponent Text base Single Text c -> Text -> Text encodeComponent Text c