{- | Outbound-request guards for the proxy's data plane: defending where the proxy fetches.

Écluse builds outbound HTTP requests from two untrusted sources -- __client-supplied
package identifiers__ (the request path) and __upstream-supplied artifact
locations__ (a packument's @dist.tarball@). This module provides the pure guard layer
that keeps the proxy from being steered by hostile input.

__Where the proxy fetches:__ 'isAllowedUpstreamHost' restricts outbound fetches
to the configured upstream hosts, and 'isBlockedTarget' rejects internal address
ranges (cloud instance metadata, loopback, RFC1918) that the proxy's network
position can otherwise reach. Together they are the SSRF gate: a target must be
both on the allowlist /and/ not an internal address.
-}
module Ecluse.Core.Security.Host (
    -- * Outbound host allowlist
    LoweredHostSet,
    lowerCaseHosts,
    isAllowedUpstreamHost,

    -- * Internal-range block
    isBlockedTarget,
    isBlockedIP,
    parseIpLiteral,
    parseBlockedRange,
    hostAddress,
    splitHostPort,

    -- * Tarball-host policy
    TarballHostPolicy (..),
    Origin (..),
    tarballHostAllowed,
    TarballHostGate (..),
    tarballHostGate,

    -- * Internal for testing
    isHex,
    isDecimal,
) where

import Data.IP (
    IP (IPv4, IPv6),
    IPRange (IPv4Range, IPv6Range),
    fromIPv6b,
    isMatchedTo,
    toIPv4,
    toIPv6,
 )
import Data.Set qualified as Set
import Data.Text qualified as T

{- | A set of host strings normalised to lower case, the form the host guards
('isAllowedUpstreamHost' and 'isBlockedTarget') compare against.

The type is __opaque, and 'lowerCaseHosts' is its only constructor__: a value of
this type therefore carries the proof that every host in it is already
lower-cased, so the guards lower only the /incoming/ host and the case-insensitive
match cannot be bypassed by an un-normalised configuration set.
-}
newtype LoweredHostSet = LoweredHostSet (Set Text)
    deriving stock (LoweredHostSet -> LoweredHostSet -> Bool
(LoweredHostSet -> LoweredHostSet -> Bool)
-> (LoweredHostSet -> LoweredHostSet -> Bool) -> Eq LoweredHostSet
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: LoweredHostSet -> LoweredHostSet -> Bool
== :: LoweredHostSet -> LoweredHostSet -> Bool
$c/= :: LoweredHostSet -> LoweredHostSet -> Bool
/= :: LoweredHostSet -> LoweredHostSet -> Bool
Eq, Int -> LoweredHostSet -> ShowS
[LoweredHostSet] -> ShowS
LoweredHostSet -> String
(Int -> LoweredHostSet -> ShowS)
-> (LoweredHostSet -> String)
-> ([LoweredHostSet] -> ShowS)
-> Show LoweredHostSet
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> LoweredHostSet -> ShowS
showsPrec :: Int -> LoweredHostSet -> ShowS
$cshow :: LoweredHostSet -> String
show :: LoweredHostSet -> String
$cshowList :: [LoweredHostSet] -> ShowS
showList :: [LoweredHostSet] -> ShowS
Show)

{- | Normalise a set of configured host strings to the canonical key form the host
guards take, yielding a 'LoweredHostSet'.

A plain DNS name is folded to lower case (hostnames are case-insensitive), so the
guards match an incoming host against the configuration regardless of how either
was spelled. An entry that parses as an __IP literal__ is additionally rendered to its
single canonical literal (see 'canonicalHostKey'), so equivalent spellings of one
address (compressed versus expanded IPv6, differing case) collapse to one key. An
operator who opts in @0:0:0:0:0:0:0:1@ therefore matches a literal @::1@ rather than
missing it on a textual difference.
-}
lowerCaseHosts :: Set Text -> LoweredHostSet
lowerCaseHosts :: Set Text -> LoweredHostSet
lowerCaseHosts = Set Text -> LoweredHostSet
LoweredHostSet (Set Text -> LoweredHostSet)
-> (Set Text -> Set Text) -> Set Text -> LoweredHostSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Text) -> Set Text -> Set Text
forall b a. Ord b => (a -> b) -> Set a -> Set b
Set.map Text -> Text
canonicalHostKey

{- | Whether @host@ is one of the configured upstream hosts.

The first guard on every outbound fetch: the proxy talks to its configured
private\/public upstreams and mirror target, and __nothing else__ -- so a target
host derived from a packument's @dist.tarball@ (or anywhere else) is fetched only
if it appears in @allowed@. The match is exact on the bare host (no port, no
scheme -- extract it with 'hostAddress' first) and __case-insensitive__, since
DNS hostnames are; an empty @host@ is never allowed. This is the allowlist half
of the SSRF gate; pair it with 'isBlockedTarget' for the internal-range half.

The allowlist is a 'LoweredHostSet', so it is already normalised and only the
incoming @host@ is folded here -- through the same 'canonicalHostKey' the set was
built with, so an IP-literal entry matches regardless of how either side spells the
address.
-}
isAllowedUpstreamHost :: LoweredHostSet -> Text -> Bool
isAllowedUpstreamHost :: LoweredHostSet -> Text -> Bool
isAllowedUpstreamHost (LoweredHostSet Set Text
allowed) Text
host =
    Bool -> Bool
not (Text -> Bool
T.null Text
host) Bool -> Bool -> Bool
&& Text -> Text
canonicalHostKey Text
host Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Text
allowed

{- | Whether @host@ is an internal address the proxy must not fetch.

A proxy sits in a privileged network position, so an attacker who can steer a
fetch (see the module header) aims it at addresses only the proxy can reach: the
cloud instance-metadata endpoint (@169.254.169.254@), loopback, or the private
network (RFC1918). This blocks, by parsing @host@ as a literal IP and testing it
against:

* __link-local__ @169.254.0.0\/16@ (which contains the @169.254.169.254@ metadata
  address) and IPv6 @fe80::\/10@;
* __loopback__ @127.0.0.0\/8@ and IPv6 @::1@;
* __unspecified \/ this-host__ @0.0.0.0\/8@ and IPv6 @::@ -- @0.0.0.0@ is not a
  no-op target: on Linux a connect to it reaches a loopback-bound service, so it
  is a loopback-equivalent that must be blocked alongside @127.0.0.0\/8@;
* __RFC1918 private__ @10.0.0.0\/8@, @172.16.0.0\/12@, and @192.168.0.0\/16@;
* __CGNAT shared__ @100.64.0.0\/10@ (RFC 6598) -- carrier-grade NAT space some
  cloud fabrics route internally;
* __IPv6 unique-local__ @fc00::\/7@ (RFC 4193) -- the private-network IPv6 analogue,
  which contains the AWS IMDSv6 metadata endpoint @fd00:ec2::254@;
* every range in @additionalRanges@, the operator-configured extension of this
  fixed set (@ECLUSE_ADDITIONAL_BLOCKED_RANGES@) -- a deployment's own internal
  space this module cannot know about in advance.

A @host@ that is not an IP literal (a DNS name) is __not__ blocked here:
name-based targets are constrained by the 'isAllowedUpstreamHost' allowlist
instead, and post-resolution IP filtering belongs to the resolving fetch layer,
not this pure check. Both guards apply -- an allowlisted host that resolves to an
internal literal is still caught when its address is tested here.
-}
isBlockedTarget :: [IPRange] -> Text -> Bool
isBlockedTarget :: [IPRange] -> Text -> Bool
isBlockedTarget [IPRange]
additionalRanges Text
host =
    Bool -> (IpAddr -> Bool) -> Maybe IpAddr -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False ([IPRange] -> IP -> Bool
isBlockedIP [IPRange]
additionalRanges (IP -> Bool) -> (IpAddr -> IP) -> IpAddr -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IpAddr -> IP
ipAddrToIP) (Text -> Maybe IpAddr
parseIpLiteral Text
host)

{- | Whether an 'IP' falls in a blocked internal range: the fixed 'blockedRanges'
set together with the caller-supplied @additionalRanges@.

The single source of record for the internal-range decision, used by the literal
block ('isBlockedTarget') on the @dist.tarball@ host gate. An
IPv4-mapped IPv6 address (@::ffff:a.b.c.d@) is first decoded to its embedded IPv4
and tested against the IPv4 ranges: a mapped internal literal (e.g.
@::ffff:169.254.169.254@) is a recognised SSRF smuggling form, so it must be
caught by the IPv4 block rather than slip through as an unrelated IPv6 address.
-}
isBlockedIP :: [IPRange] -> IP -> Bool
isBlockedIP :: [IPRange] -> IP -> Bool
isBlockedIP [IPRange]
additionalRanges IP
ip = (IPRange -> Bool) -> [IPRange] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any IPRange -> Bool
matches ([IPRange]
blockedRanges [IPRange] -> [IPRange] -> [IPRange]
forall a. Semigroup a => a -> a -> a
<> [IPRange]
additionalRanges)
  where
    decoded :: IP
decoded = IP -> IP
decodeMappedV4 IP
ip
    matches :: IPRange -> Bool
matches = \case
        IPv4Range AddrRange IPv4
r -> case IP
decoded of
            IPv4 IPv4
a -> IPv4
a IPv4 -> AddrRange IPv4 -> Bool
forall a. Addr a => a -> AddrRange a -> Bool
`isMatchedTo` AddrRange IPv4
r
            IPv6 IPv6
_ -> Bool
False
        IPv6Range AddrRange IPv6
r -> case IP
decoded of
            IPv6 IPv6
a -> IPv6
a IPv6 -> AddrRange IPv6 -> Bool
forall a. Addr a => a -> AddrRange a -> Bool
`isMatchedTo` AddrRange IPv6
r
            IPv4 IPv4
_ -> Bool
False

{- The internal ranges the proxy refuses to fetch from, as @iproute@ CIDR values:
the unspecified \/ this-host, loopback, link-local, RFC1918, CGNAT-shared, and
IPv6 unique-local blocks. Declared once and consulted by 'isBlockedIP' alone, so
the blocked set is a single cross-cutting invariant. @0.0.0.0\/8@ is blocked
because @0.0.0.0@ reaches a loopback-bound service on Linux; @169.254.0.0\/16@
contains the @169.254.169.254@ cloud-metadata endpoint; @fc00::\/7@ contains the
AWS IMDSv6 endpoint @fd00:ec2::254@. An operator cannot narrow this fixed set --
only extend it, via the @additionalRanges@ 'isBlockedIP' also consults.
-}
blockedRanges :: [IPRange]
blockedRanges :: [IPRange]
blockedRanges =
    [ IPRange
"0.0.0.0/8" -- unspecified / this-host (reaches loopback on Linux)
    , IPRange
"10.0.0.0/8" -- RFC1918 private
    , IPRange
"100.64.0.0/10" -- CGNAT shared (RFC 6598)
    , IPRange
"127.0.0.0/8" -- loopback
    , IPRange
"169.254.0.0/16" -- link-local (incl. 169.254.169.254 metadata)
    , IPRange
"172.16.0.0/12" -- RFC1918 private
    , IPRange
"192.168.0.0/16" -- RFC1918 private
    , IPRange
"::/128" -- IPv6 unspecified
    , IPRange
"::1/128" -- IPv6 loopback
    , IPRange
"fe80::/10" -- IPv6 link-local
    , IPRange
"fc00::/7" -- IPv6 unique-local (incl. AWS IMDSv6 fd00:ec2::254)
    ]

{- | Parse one operator-configured @ECLUSE_ADDITIONAL_BLOCKED_RANGES@ entry (a
single CIDR, e.g. @"203.0.113.0\/24"@ or @"2001:db8::\/32"@) into an 'IPRange', or
'Nothing' for anything malformed.

A __total__ wrapper over @iproute@'s own 'Read' instance for 'IPRange': that
instance's underlying parser (@parseIPRange@) already fails by returning no
parse rather than calling 'error', so 'readMaybe' over it is safe -- unlike the
partial 'IsString' instance ('blockedRanges' relies on for its own compile-time
literals, where a malformed literal would be a build-time error, never runtime
input). This is the only way the config decoder is meant to turn operator text
into an 'IPRange': a malformed entry must fail closed at boot, never be silently
dropped or accepted as an unblocked range.
-}
parseBlockedRange :: Text -> Maybe IPRange
parseBlockedRange :: Text -> Maybe IPRange
parseBlockedRange = String -> Maybe IPRange
forall a. Read a => String -> Maybe a
readMaybe (String -> Maybe IPRange)
-> (Text -> String) -> Text -> Maybe IPRange
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
forall a. ToString a => a -> String
toString

{- Convert a recognised literal to an @iproute@ 'IP' for the membership test.
The four IPv4 octets become an 'IPv4', and the eight 16-bit groups an 'IPv6'. The
IPv4-mapped decode is left to 'isBlockedIP' ('decodeMappedV4'), so a mapped
literal is carried here as the IPv6 it textually is and decoded only at the point
of the range test.
-}
ipAddrToIP :: IpAddr -> IP
ipAddrToIP :: IpAddr -> IP
ipAddrToIP = \case
    IpV4 Word8
a Word8
b Word8
c Word8
d -> IPv4 -> IP
IPv4 ([Int] -> IPv4
toIPv4 ((Word8 -> Int) -> [Word8] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral [Word8
a, Word8
b, Word8
c, Word8
d]))
    IpV6 [Word16]
groups -> IPv6 -> IP
IPv6 ([Int] -> IPv6
toIPv6 ((Word16 -> Int) -> [Word16] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral [Word16]
groups))

{- The canonical comparison key for a host: a normalised string the host guards
match a 'LoweredHostSet' on. A host that parses as an IP literal is rendered to the
@iproute@ canonical literal through @IP@ 'show', so equivalent spellings of one
address collapse to one key: compressed versus expanded IPv6
(@::1@ is @0:0:0:0:0:0:0:1@), embedded IPv4, and hex case all canonicalise identically.
Anything that is not a literal (a DNS name) is merely case-folded, since hostnames are
case-insensitive.

This is the single canonicaliser feeding the host allowlist: 'lowerCaseHosts' builds
the configured set with it, and 'isAllowedUpstreamHost' folds the queried host with
it, so a configured entry matches a literal address whichever representation either
uses. Pointing both sides at one @show@ is what guarantees they render identically;
a second, separate canonicaliser could drift.
-}
canonicalHostKey :: Text -> Text
canonicalHostKey :: Text -> Text
canonicalHostKey Text
host = case Text -> Maybe IpAddr
parseIpLiteral Text
host of
    Just IpAddr
addr -> IP -> Text
forall b a. (Show a, IsString b) => a -> b
show (IpAddr -> IP
ipAddrToIP IpAddr
addr)
    Maybe IpAddr
Nothing -> Text -> Text
T.toLower Text
host

{- Decode an IPv4-mapped IPv6 address (@::ffff:a.b.c.d@) to its embedded IPv4, so
it is tested against the IPv4 ranges; any other address is returned unchanged.
Over the sixteen octets 'fromIPv6b' yields, the mapped form is ten zero octets,
then @ff ff@, then the four IPv4 octets. Testing a mapped internal literal against
the IPv6 ranges instead would let @::ffff:169.254.169.254@ through, so the decode
is load-bearing for the SSRF block.
-}
decodeMappedV4 :: IP -> IP
decodeMappedV4 :: IP -> IP
decodeMappedV4 = \case
    IPv6 IPv6
v6 -> case IPv6 -> [Int]
fromIPv6b IPv6
v6 of
        [Int
0, Int
0, Int
0, Int
0, Int
0, Int
0, Int
0, Int
0, Int
0, Int
0, Int
0xFF, Int
0xFF, Int
a, Int
b, Int
c, Int
d] ->
            IPv4 -> IP
IPv4 ([Int] -> IPv4
toIPv4 [Int
a, Int
b, Int
c, Int
d])
        [Int]
_ -> IPv6 -> IP
IPv6 IPv6
v6
    IP
ip -> IP
ip

{- An IP literal, parsed from a host for internal-range testing. Internal to
this module; converted to an @iproute@ 'IP' by 'ipAddrToIP' for the membership
test, so it carries no instances.
-}
data IpAddr
    = -- An IPv4 address as its four octets.
      IpV4 Word8 Word8 Word8 Word8
    | -- An IPv6 address, normalised to its eight 16-bit groups.
      IpV6 [Word16]

{- | Extract the bare host from a URI or @host[:port]@ authority.

A convenience for the SSRF gate: an outbound target is usually a full URL or an
authority, but 'isAllowedUpstreamHost' and 'isBlockedTarget' compare the bare
host. This strips a @scheme:\/\/@ prefix, any @userinfo\@@, any @:port@ suffix,
and any @\/path@\/@?query@\/@#fragment@ tail, lower-casing the result. It is a
pragmatic extractor for comparison, __not__ a full RFC 3986 parser; a value with
no recognisable host yields the empty string, which both guards treat as
not-allowed. IPv6 literals in brackets (@[::1]:443@) are returned without the
brackets -- the bracket-aware @host[:port]@ split is 'splitHostPort', shared with
the SQS endpoint parser so the two cannot drift on an authority edge case; a
malformed authority (an opening bracket with no close) yields the empty string,
the same fail-safe the guards apply to it.
-}
hostAddress :: Text -> Text
hostAddress :: Text -> Text
hostAddress Text
raw =
    let afterScheme :: Text
afterScheme = Text -> Text -> Text
afterLast Text
"://" Text
raw
        authority :: Text
authority = (Char -> Bool) -> Text -> Text
T.takeWhile (Char -> String -> Bool
forall (f :: * -> *) a.
(Foldable f, DisallowElem f, Eq a) =>
a -> f a -> Bool
`notElem` [Char
'/', Char
'?', Char
'#']) Text
afterScheme
        afterUserinfo :: Text
afterUserinfo = Text -> Text -> Text
afterLast Text
"@" Text
authority
     in Text -> Text
T.toLower (Text -> ((Text, Text) -> Text) -> Maybe (Text, Text) -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" (Text, Text) -> Text
forall a b. (a, b) -> a
fst (Text -> Maybe (Text, Text)
splitHostPort Text
afterUserinfo))
  where
    -- The text after @needle@'s last occurrence, or all of @hay@ if absent.
    -- ('T.breakOnEnd' yields @(hay, "")@ when the needle is absent -- its prefix
    -- is non-empty exactly when the needle was found, since it includes it.)
    afterLast :: Text -> Text -> Text
    afterLast :: Text -> Text -> Text
afterLast Text
needle Text
hay =
        let (Text
pre, Text
post) = HasCallStack => Text -> Text -> (Text, Text)
Text -> Text -> (Text, Text)
T.breakOnEnd Text
needle Text
hay
         in if Text -> Bool
T.null Text
pre then Text
hay else Text
post

{- | Split a @host[:port]@ authority into its bare host and the raw @":port"@
remainder (empty when no port is present), bracket-aware so an IPv6 literal's
inner colons are never mistaken for the port separator.

The single canonical authority split feeding both the data-plane host extractor
('hostAddress') and the SQS endpoint parser ('Ecluse.Composition.parseEndpointUrl'),
so the two re-implementations the @[::1]:port@ edge cases tripped on cannot drift
again. A @[…]@ IPv6 literal is split on its closing bracket -- the host is returned
without the brackets and the remainder is whatever follows (a @":port"@ or empty) --
so an inner @::@ is never read as the port separator; a bare authority is split on
its first @':'@. An opening bracket with __no__ close is a malformed authority and
yields 'Nothing', which 'hostAddress' folds to the empty (not-allowed) host and the
endpoint parser surfaces as a malformed-URL boot error.
-}
splitHostPort :: Text -> Maybe (Text, Text)
splitHostPort :: Text -> Maybe (Text, Text)
splitHostPort Text
authority
    | Text -> Bool
T.null Text
authority = Maybe (Text, Text)
forall a. Maybe a
Nothing
    | Bool
otherwise = case Text -> Text -> Maybe Text
T.stripPrefix Text
"[" Text
authority of
        Just Text
rest -> case HasCallStack => Text -> Text -> (Text, Text)
Text -> Text -> (Text, Text)
T.breakOn Text
"]" Text
rest of
            (Text
_, Text
"") -> Maybe (Text, Text)
forall a. Maybe a
Nothing -- an opening bracket with no close: malformed
            (Text
inner, Text
afterBracket) -> (Text, Text) -> Maybe (Text, Text)
forall a. a -> Maybe a
Just (Text
inner, Int -> Text -> Text
T.drop Int
1 Text
afterBracket)
        Maybe Text
Nothing -> case HasCallStack => Text -> Text -> (Text, Text)
Text -> Text -> (Text, Text)
T.breakOn Text
":" Text
authority of
            (Text
"", Text
_) -> Maybe (Text, Text)
forall a. Maybe a
Nothing
            (Text
h, Text
"") -> (Text, Text) -> Maybe (Text, Text)
forall a. a -> Maybe a
Just (Text
h, Text
"")
            (Text
h, Text
p) -> if Text
p Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
":" then (Text, Text) -> Maybe (Text, Text)
forall a. a -> Maybe a
Just (Text
h, Text
"") else (Text, Text) -> Maybe (Text, Text)
forall a. a -> Maybe a
Just (Text
h, Text
p)

{- | Parse a host as an IP literal, or 'Nothing' for a DNS name. Handles dotted-
quad IPv4 and the IPv6 forms a host realistically carries -- full eight-group form,
@::@-compressed forms (including @::1@), and a trailing embedded IPv4 (the
@a.b.c.d@ in @::ffff:a.b.c.d@) -- which is enough to recognise the loopback,
link-local, and IPv4-mapped addresses 'isBlockedIP' blocks. It is deliberately
__not__ a complete IPv6 parser (no zone ids); an unrecognised literal is treated
as a name, which the host allowlist still constrains.

Only range __membership__ is delegated to @iproute@ ('isBlockedIP'); recognising
the literal stays hand-rolled __on purpose__. This recogniser is deliberately
__lenient__ on the IPv4 dotted-quad: it accepts the ambiguous octet spellings a
strict IP library rejects and coerces each octet exactly as @inet_aton@ -- and
hence a libc resolver -- does, so the block tests the address that would actually be
dialled. A @0x@\/@0X@-prefixed octet is hexadecimal, a leading-zero octet is
__octal__, and anything else is decimal. A leading-zero octet is therefore /not/
its decimal digits: @0012.0.0.1@ is octal @10.0.0.1@ (RFC1918, blocked), whereas
@010.0.0.1@ is octal @8.0.0.1@ and @0127.0.0.1@ is octal @87.0.0.1@ (both public,
not blocked) -- matching the resolver rather than a decimal misreading. A stricter
parser that rejected these spellings would let an octal\/hex spelling of an
internal address skip the block and reach the resolving fetch as a name, silently
__narrowing__ the SSRF gate.

Two boundaries are deliberately not modelled here; such a host is simply treated as a
name, which the host allowlist constrains. First, the __short__ @inet_aton@ forms with
fewer than four parts (a bare 32-bit number @2130706433@ \/ @0x7f000001@, or a @127.1@)
are not literals here. Second, a malformed octet (an invalid-octal @08@, where 8 is not
an octal digit, or an overflowing @0400@\/@256@\/@0x100@) is not a literal, exactly as a
resolver rejects it. A malformed IPv6 group that overflows 16 bits (@fe80::1ffff@) is
likewise not a literal here. Delegating literal /parsing/ to a library would change this
lenient/strict boundary, so it is kept here.
-}
parseIpLiteral :: Text -> Maybe IpAddr
parseIpLiteral :: Text -> Maybe IpAddr
parseIpLiteral Text
host = case Text -> Maybe (Char, Text)
T.uncons Text
host of
    Maybe (Char, Text)
Nothing -> Maybe IpAddr
forall a. Maybe a
Nothing -- empty host: not a literal
    Just (Char, Text)
_ -> if (Char -> Bool) -> Text -> Bool
T.any (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':') Text
host then Text -> Maybe IpAddr
parseIPv6 Text
host else (Text -> Maybe Word8) -> Text -> Maybe IpAddr
parseIPv4 Text -> Maybe Word8
octetInetAton Text
host

{- Parse a four-part dotted-quad @a.b.c.d@ into its octets, each coerced to @0..255@
by the supplied octet parser. The top-level host literal passes the
@inet_aton@-faithful 'octetInetAton' (leading-zero octal and @0x@ hex), and the
embedded IPv4-in-IPv6 form passes the strict-decimal 'octetDecimal'; only the
four-part form is recognised (see 'parseIpLiteral' for the short forms treated as names).
-}
parseIPv4 :: (Text -> Maybe Word8) -> Text -> Maybe IpAddr
parseIPv4 :: (Text -> Maybe Word8) -> Text -> Maybe IpAddr
parseIPv4 Text -> Maybe Word8
octet Text
host = case HasCallStack => Text -> Text -> [Text]
Text -> Text -> [Text]
T.splitOn Text
"." Text
host of
    [Text
a, Text
b, Text
c, Text
d] -> Word8 -> Word8 -> Word8 -> Word8 -> IpAddr
IpV4 (Word8 -> Word8 -> Word8 -> Word8 -> IpAddr)
-> Maybe Word8 -> Maybe (Word8 -> Word8 -> Word8 -> IpAddr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Maybe Word8
octet Text
a Maybe (Word8 -> Word8 -> Word8 -> IpAddr)
-> Maybe Word8 -> Maybe (Word8 -> Word8 -> IpAddr)
forall a b. Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Text -> Maybe Word8
octet Text
b Maybe (Word8 -> Word8 -> IpAddr)
-> Maybe Word8 -> Maybe (Word8 -> IpAddr)
forall a b. Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Text -> Maybe Word8
octet Text
c Maybe (Word8 -> IpAddr) -> Maybe Word8 -> Maybe IpAddr
forall a b. Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Text -> Maybe Word8
octet Text
d
    [Text]
_ -> Maybe IpAddr
forall a. Maybe a
Nothing

{- An IPv4 octet under @inet_aton@'s per-part base rules -- the coercion a libc
resolver ('getAddrInfo') applies, so the internal-range block tests the address a
resolver would actually dial. A @0x@\/@0X@ prefix is hexadecimal, a leading @0@
(with at least one more digit) is octal, and anything else is decimal; the parsed
value must still fit @0..255@, so an overflowing part (@0400@ = 256, @0x100@ = 256)
is rejected exactly as a resolver rejects it. The base-digit check keeps 'readMaybe'
from accepting signs or whitespace and rejects a digit outside the chosen base (the
@8@ in @08@ is not octal), so such a spelling is not a literal -- matching glibc,
which refuses it rather than coercing it.
-}
octetInetAton :: Text -> Maybe Word8
octetInetAton :: Text -> Maybe Word8
octetInetAton Text
tok = do
    n <- Maybe Integer
value
    if n <= 255 then Just (fromInteger n) else Nothing
  where
    value :: Maybe Integer
    value :: Maybe Integer
value = case Text -> Maybe (Char, Text)
T.uncons Text
tok of
        Just (Char
'0', Text
rest)
            | Text -> Text
T.toLower (Int -> Text -> Text
T.take Int
1 Text
rest) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"x" ->
                let hex :: Text
hex = Int -> Text -> Text
T.drop Int
1 Text
rest
                 in if Text -> Bool
isHex Text
hex then String -> Maybe Integer
forall a. Read a => String -> Maybe a
readMaybe (String
"0x" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Text -> String
forall a. ToString a => a -> String
toString Text
hex) else Maybe Integer
forall a. Maybe a
Nothing
            | Bool -> Bool
not (Text -> Bool
T.null Text
rest) ->
                if Text -> Bool
isOctal Text
tok then String -> Maybe Integer
forall a. Read a => String -> Maybe a
readMaybe (String
"0o" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Text -> String
forall a. ToString a => a -> String
toString Text
tok) else Maybe Integer
forall a. Maybe a
Nothing
        Maybe (Char, Text)
_ -> if Text -> Bool
isDecimal Text
tok then String -> Maybe Integer
forall a. Read a => String -> Maybe a
readMaybe (Text -> String
forall a. ToString a => a -> String
toString Text
tok) else Maybe Integer
forall a. Maybe a
Nothing

{- An IPv4 octet as a non-empty all-decimal run in @0..255@: the strict spelling
used inside an IPv4-in-IPv6 literal (@::ffff:a.b.c.d@), where the embedded form is
not subject to @inet_aton@'s base coercion. The digit check keeps 'readMaybe' from
accepting signs\/whitespace, so a parsed value is >= 0.
-}
octetDecimal :: Text -> Maybe Word8
octetDecimal :: Text -> Maybe Word8
octetDecimal Text
t = do
    n <- if Text -> Bool
isDecimal Text
t then String -> Maybe Integer
forall a. Read a => String -> Maybe a
readMaybe (Text -> String
forall a. ToString a => a -> String
toString Text
t) else Maybe Integer
forall a. Maybe a
Nothing :: Maybe Integer
    if n <= 255 then Just (fromInteger n) else Nothing

{- Parse an IPv6 literal -- either the full eight-group form or a @::@-compressed
form (at most one @::@), optionally ending in an embedded dotted-quad IPv4 -- into
its eight 16-bit groups. Enough to recognise the @::1@, @fe80::\/10@, and
@::ffff:0:0\/96@ addresses we block; rejects anything malformed.
-}
parseIPv6 :: Text -> Maybe IpAddr
parseIPv6 :: Text -> Maybe IpAddr
parseIPv6 Text
host = case HasCallStack => Text -> Text -> [Text]
Text -> Text -> [Text]
T.splitOn Text
"::" Text
host of
    [Text
single] -> [Word16] -> Maybe IpAddr
exactlyEightGroups ([Word16] -> Maybe IpAddr) -> Maybe [Word16] -> Maybe IpAddr
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Text -> Maybe [Word16]
parseV6Side Text
single
    [Text
before, Text
after] -> do
        hd <- Text -> Maybe [Word16]
parseV6Side Text
before
        tl <- parseV6Side after
        expandCompressedV6 hd tl
    [Text]
_ -> Maybe IpAddr
forall a. Maybe a
Nothing -- more than one "::" is illegal

{- The colon-separated groups of one side of the @::@; "" → no groups. The final
token may be a dotted-quad IPv4 (RFC 4291 §2.2.3, e.g. the @169.254.169.254@ in
@::ffff:169.254.169.254@), which expands to its two 16-bit groups so an
IPv4-mapped literal in its canonical dotted form is decoded rather than
mistaken for a name. Only the last token may be dotted; an interior dotted
token fails 'parseV6Group' (no hex '.') and the whole parse is rejected.
-}
parseV6Side :: Text -> Maybe [Word16]
parseV6Side :: Text -> Maybe [Word16]
parseV6Side Text
t
    | Text -> Bool
T.null Text
t = [Word16] -> Maybe [Word16]
forall a. a -> Maybe a
Just []
    | Bool
otherwise = [Text] -> Maybe [Word16]
parseV6Tokens (HasCallStack => Text -> Text -> [Text]
Text -> Text -> [Text]
T.splitOn Text
":" Text
t)

parseV6Tokens :: [Text] -> Maybe [Word16]
parseV6Tokens :: [Text] -> Maybe [Word16]
parseV6Tokens [] = [Word16] -> Maybe [Word16]
forall a. a -> Maybe a
Just []
parseV6Tokens [Text
tok]
    | (Char -> Bool) -> Text -> Bool
T.any (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.') Text
tok = Text -> Maybe [Word16]
parseEmbeddedV4 Text
tok
    | Bool
otherwise = (Word16 -> [Word16] -> [Word16]
forall a. a -> [a] -> [a]
: []) (Word16 -> [Word16]) -> Maybe Word16 -> Maybe [Word16]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Maybe Word16
parseV6Group Text
tok
parseV6Tokens (Text
tok : [Text]
rest) = (:) (Word16 -> [Word16] -> [Word16])
-> Maybe Word16 -> Maybe ([Word16] -> [Word16])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Maybe Word16
parseV6Group Text
tok Maybe ([Word16] -> [Word16]) -> Maybe [Word16] -> Maybe [Word16]
forall a b. Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [Text] -> Maybe [Word16]
parseV6Tokens [Text]
rest

-- A trailing dotted-quad IPv4 as its two 16-bit groups (high pair, low pair).
parseEmbeddedV4 :: Text -> Maybe [Word16]
parseEmbeddedV4 :: Text -> Maybe [Word16]
parseEmbeddedV4 Text
t = case (Text -> Maybe Word8) -> Text -> Maybe IpAddr
parseIPv4 Text -> Maybe Word8
octetDecimal Text
t of
    Just (IpV4 Word8
a Word8
b Word8
c Word8
d) -> [Word16] -> Maybe [Word16]
forall a. a -> Maybe a
Just [Word8 -> Word8 -> Word16
forall {a} {a} {a}. (Integral a, Integral a, Num a) => a -> a -> a
pair Word8
a Word8
b, Word8 -> Word8 -> Word16
forall {a} {a} {a}. (Integral a, Integral a, Num a) => a -> a -> a
pair Word8
c Word8
d]
    Maybe IpAddr
_ -> Maybe [Word16]
forall a. Maybe a
Nothing
  where
    pair :: a -> a -> a
pair a
hi a
lo = a -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
hi a -> a -> a
forall a. Num a => a -> a -> a
* a
256 a -> a -> a
forall a. Num a => a -> a -> a
+ a -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
lo

{- A group is a non-empty all-hex run that fits in 16 bits. The hex check
keeps 'readMaybe' from accepting signs, so a parsed value is >= 0.
-}
parseV6Group :: Text -> Maybe Word16
parseV6Group :: Text -> Maybe Word16
parseV6Group Text
t = do
    n <- if Text -> Bool
isHex Text
t then String -> Maybe Integer
forall a. Read a => String -> Maybe a
readMaybe (String
"0x" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Text -> String
forall a. ToString a => a -> String
toString Text
t) else Maybe Integer
forall a. Maybe a
Nothing :: Maybe Integer
    if n <= 0xFFFF then Just (fromInteger n) else Nothing

{- Fill the compressed form's zero run: "::" stands for at least one all-zero
group, so the explicit groups on either side must total at most 7 (leaving room
to fill to 8).
-}
expandCompressedV6 :: [Word16] -> [Word16] -> Maybe IpAddr
expandCompressedV6 :: [Word16] -> [Word16] -> Maybe IpAddr
expandCompressedV6 [Word16]
hd [Word16]
tl =
    let present :: Int
present = [Word16] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word16]
hd Int -> Int -> Int
forall a. Num a => a -> a -> a
+ [Word16] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word16]
tl
     in if Int
present Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
7
            then IpAddr -> Maybe IpAddr
forall a. a -> Maybe a
Just ([Word16] -> IpAddr
IpV6 ([Word16]
hd [Word16] -> [Word16] -> [Word16]
forall a. Semigroup a => a -> a -> a
<> Int -> Word16 -> [Word16]
forall a. Int -> a -> [a]
replicate (Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
present) Word16
0 [Word16] -> [Word16] -> [Word16]
forall a. Semigroup a => a -> a -> a
<> [Word16]
tl))
            else Maybe IpAddr
forall a. Maybe a
Nothing

-- Exactly the full eight-group form; anything else is malformed.
exactlyEightGroups :: [Word16] -> Maybe IpAddr
exactlyEightGroups :: [Word16] -> Maybe IpAddr
exactlyEightGroups gs :: [Word16]
gs@[Word16
_, Word16
_, Word16
_, Word16
_, Word16
_, Word16
_, Word16
_, Word16
_] = IpAddr -> Maybe IpAddr
forall a. a -> Maybe a
Just ([Word16] -> IpAddr
IpV6 [Word16]
gs)
exactlyEightGroups [Word16]
_ = Maybe IpAddr
forall a. Maybe a
Nothing

-- Whether @t@ is a non-empty run of decimal digits (no sign or whitespace).
isDecimal :: Text -> Bool
isDecimal :: Text -> Bool
isDecimal Text
t = Bool -> Bool
not (Text -> Bool
T.null Text
t) Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.all (Char -> String -> Bool
forall (f :: * -> *) a.
(Foldable f, DisallowElem f, Eq a) =>
a -> f a -> Bool
`elem` [Char
'0' .. Char
'9']) Text
t

-- Whether @t@ is a non-empty run of octal digits (0..7).
isOctal :: Text -> Bool
isOctal :: Text -> Bool
isOctal Text
t = Bool -> Bool
not (Text -> Bool
T.null Text
t) Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.all (Char -> String -> Bool
forall (f :: * -> *) a.
(Foldable f, DisallowElem f, Eq a) =>
a -> f a -> Bool
`elem` [Char
'0' .. Char
'7']) Text
t

-- Whether @t@ is a non-empty run of hexadecimal digits.
isHex :: Text -> Bool
isHex :: Text -> Bool
isHex Text
t = Bool -> Bool
not (Text -> Bool
T.null Text
t) Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isHexDigit Text
t
  where
    isHexDigit :: Char -> Bool
isHexDigit Char
c = Char
c Char -> String -> Bool
forall (f :: * -> *) a.
(Foldable f, DisallowElem f, Eq a) =>
a -> f a -> Bool
`elem` ([Char
'0' .. Char
'9'] String -> ShowS
forall a. Semigroup a => a -> a -> a
<> [Char
'a' .. Char
'f'] String -> ShowS
forall a. Semigroup a => a -> a -> a
<> [Char
'A' .. Char
'F'])

{- | Whether a tarball may be fetched from a host that differs from the upstream
that served the packument.

An upstream's @dist.tarball@ is server-chosen data (see
@docs\/architecture\/security.md@ → "Why @dist.tarball@ is honoured"), so a
compromised or hostile upstream can name __any__ host as the artifact location.
This policy bounds the axis of that risk the host allowlist leaves open: /where/ the
bytes are fetched. Even an allowlisted-but-/different/ host is a wider fetch surface than
the packument's own source, and the safe reading of the allowlist is "same source unless
told otherwise".
-}
data TarballHostPolicy
    = {- | The secure default: a tarball is fetched only from the __same__ host
      that served the packument; a @dist.tarball@ on any other host is refused,
      even one otherwise on the allowlist.
      -}
      SameHostAsPackument
    | {- | The opt-in: a tarball may be fetched from __any allowlisted__ host (for a
      registry that legitimately serves artifacts from a separate CDN\/files host).
      This widens the fetch surface to the whole allowlist; it never escapes it or
      the internal-range block.
      -}
      AnyAllowlistedHost
    deriving stock (TarballHostPolicy -> TarballHostPolicy -> Bool
(TarballHostPolicy -> TarballHostPolicy -> Bool)
-> (TarballHostPolicy -> TarballHostPolicy -> Bool)
-> Eq TarballHostPolicy
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: TarballHostPolicy -> TarballHostPolicy -> Bool
== :: TarballHostPolicy -> TarballHostPolicy -> Bool
$c/= :: TarballHostPolicy -> TarballHostPolicy -> Bool
/= :: TarballHostPolicy -> TarballHostPolicy -> Bool
Eq, Int -> TarballHostPolicy -> ShowS
[TarballHostPolicy] -> ShowS
TarballHostPolicy -> String
(Int -> TarballHostPolicy -> ShowS)
-> (TarballHostPolicy -> String)
-> ([TarballHostPolicy] -> ShowS)
-> Show TarballHostPolicy
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> TarballHostPolicy -> ShowS
showsPrec :: Int -> TarballHostPolicy -> ShowS
$cshow :: TarballHostPolicy -> String
show :: TarballHostPolicy -> String
$cshowList :: [TarballHostPolicy] -> ShowS
showList :: [TarballHostPolicy] -> ShowS
Show)

{- | The trust of the origin a @dist.tarball@ is being served from: the
operator-configured private upstream is 'TrustedOrigin', and the public upstream,
together with every artifact location an attacker could influence, is 'UntrustedOrigin'.

The distinction governs the __literal internal-range block__ alone (the cheap pure
defence-in-depth on the host gate). The trusted private origin is deliberately exempt
from it: a private registry may legitimately live on an internal address, and only an
untrusted target can be steered there. It never relaxes the host allowlist or the
same-host clause, which gate both origins identically, so a trusted origin's
@dist.tarball@ is still constrained to its own allowlisted host.
-}
data Origin
    = -- | The operator-configured private upstream: exempt from the literal internal-range block.
      TrustedOrigin
    | -- | The public upstream, and any attacker-influenceable target: subject to the literal internal-range block.
      UntrustedOrigin
    deriving stock (Origin -> Origin -> Bool
(Origin -> Origin -> Bool)
-> (Origin -> Origin -> Bool) -> Eq Origin
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Origin -> Origin -> Bool
== :: Origin -> Origin -> Bool
$c/= :: Origin -> Origin -> Bool
/= :: Origin -> Origin -> Bool
Eq, Int -> Origin -> ShowS
[Origin] -> ShowS
Origin -> String
(Int -> Origin -> ShowS)
-> (Origin -> String) -> ([Origin] -> ShowS) -> Show Origin
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Origin -> ShowS
showsPrec :: Int -> Origin -> ShowS
$cshow :: Origin -> String
show :: Origin -> String
$cshowList :: [Origin] -> ShowS
showList :: [Origin] -> ShowS
Show)

{- | Whether a @dist.tarball@ host may be fetched, given the origin's trust, the
policy, the host that served the packument, and the configured guards.

This is the policy half of the @dist.tarball@ defence; it never replaces the host
allowlist or the literal internal-range block but composes /on top/ of them, so the
answer is the conjunction of three independent checks and over-blocking is the
fail-safe:

* the @tarballHost@ must be on the host allowlist (@allowed@), as every outbound
  target is: a @dist.tarball@ host off the allowlist is refused regardless of
  policy;
* it must not be an internal-address literal (the fixed range set plus the
  operator-configured @additionalBlockedRanges@), the cheap pure defence-in-depth,
  but a 'TrustedOrigin' is __exempt__ from this clause (see 'Origin'); and
* under 'SameHostAsPackument' (the secure default) it must additionally __equal__
  the @packumentHost@ (the host that served the metadata), so a tarball on a
  /different/ host is refused even when that host is allowlisted. Under
  'AnyAllowlistedHost' that last clause is relaxed, leaving only the allowlist and
  (origin-aware) internal-range checks.

The allowlist and same-host clauses gate __both__ origins identically; only the
internal-range clause is origin-aware, so a 'TrustedOrigin' is never let past its own
allowlisted host or onto a /different/ host than its metadata under the default.

Hosts are compared by their canonical key (case-folded, and for an IP-literal the
single canonical literal; see 'canonicalHostKey'), as the host guards are. An
empty @tarballHost@ is never allowed (the allowlist already refuses it). The
@packumentHost@ is the bare host the metadata was fetched from (extract it with
'hostAddress'); only its equality to @tarballHost@ matters, so it need not itself
be re-validated here: it was already gated when the packument was fetched.
-}
tarballHostAllowed ::
    Origin ->
    TarballHostPolicy ->
    -- | The host allowlist (the same one every outbound fetch is gated by).
    LoweredHostSet ->
    {- | The operator-configured ranges extending the fixed internal-range block
    (untrusted origin).
    -}
    [IPRange] ->
    -- | The bare host that served the packument.
    Text ->
    -- | The bare host of the candidate @dist.tarball@.
    Text ->
    Bool
tarballHostAllowed :: Origin
-> TarballHostPolicy
-> LoweredHostSet
-> [IPRange]
-> Text
-> Text
-> Bool
tarballHostAllowed Origin
origin TarballHostPolicy
policy LoweredHostSet
allowed [IPRange]
additionalBlockedRanges Text
packumentHost Text
tarballHost =
    LoweredHostSet -> Text -> Bool
isAllowedUpstreamHost LoweredHostSet
allowed Text
tarballHost
        Bool -> Bool -> Bool
&& Bool
internalRangeOk
        Bool -> Bool -> Bool
&& case TarballHostPolicy
policy of
            TarballHostPolicy
SameHostAsPackument -> Text -> Text
canonicalHostKey Text
tarballHost Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Text
canonicalHostKey Text
packumentHost
            TarballHostPolicy
AnyAllowlistedHost -> Bool
True
  where
    -- The literal internal-range block is origin-aware: the trusted private origin is
    -- exempt, the untrusted origin is gated against the fixed set plus the operator's
    -- additional ranges.
    internalRangeOk :: Bool
    internalRangeOk :: Bool
internalRangeOk = case Origin
origin of
        Origin
TrustedOrigin -> Bool
True
        Origin
UntrustedOrigin -> Bool -> Bool
not ([IPRange] -> Text -> Bool
isBlockedTarget [IPRange]
additionalBlockedRanges Text
tarballHost)

{- | The mount-constant inputs to the per-request 'tarballHostAllowed' gate, extracted
__once__ from a mount's three configured upstream URLs so the serve path parses no URL
and builds no host set per request.

The serve-path tarball gate is on the hot artifact path (every private hit and every
public leg runs it), yet its allowlist and the private\/public upstream hosts never
change after boot -- they are fixed by the mount's configuration. Recovering them from
the base URLs on each request rebuilt a 'LoweredHostSet' and re-ran 'hostAddress' several
times per artifact; precomputing them here into a 'TarballHostGate' collapses that to a
few field reads. The only genuinely per-request host is the dynamic public
@dist.tarball@, still parsed at the call site.
-}
data TarballHostGate = TarballHostGate
    { TarballHostGate -> LoweredHostSet
thgAllowlist :: LoweredHostSet
    {- ^ The lowered allowlist of the mount's configured upstream hosts (public, private,
    and mirror target) -- the same set every outbound fetch is gated against
    (security.md invariant 2).
    -}
    , TarballHostGate -> Text
thgPrivateHost :: Text
    -- ^ The bare host of the private upstream, extracted once.
    , TarballHostGate -> Text
thgPublicHost :: Text
    -- ^ The bare host of the public upstream, extracted once.
    }
    deriving stock (TarballHostGate -> TarballHostGate -> Bool
(TarballHostGate -> TarballHostGate -> Bool)
-> (TarballHostGate -> TarballHostGate -> Bool)
-> Eq TarballHostGate
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: TarballHostGate -> TarballHostGate -> Bool
== :: TarballHostGate -> TarballHostGate -> Bool
$c/= :: TarballHostGate -> TarballHostGate -> Bool
/= :: TarballHostGate -> TarballHostGate -> Bool
Eq, Int -> TarballHostGate -> ShowS
[TarballHostGate] -> ShowS
TarballHostGate -> String
(Int -> TarballHostGate -> ShowS)
-> (TarballHostGate -> String)
-> ([TarballHostGate] -> ShowS)
-> Show TarballHostGate
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> TarballHostGate -> ShowS
showsPrec :: Int -> TarballHostGate -> ShowS
$cshow :: TarballHostGate -> String
show :: TarballHostGate -> String
$cshowList :: [TarballHostGate] -> ShowS
showList :: [TarballHostGate] -> ShowS
Show)

{- | Build the 'TarballHostGate' from a mount's private, public, and mirror-target
upstream URLs: the allowlist is the lowered set of their bare hosts, and the private and
public hosts are each extracted once with 'hostAddress'. Called once per mount at the
composition root (and by test fixtures); the result is carried on the serve
dependencies so the per-request gate reads fields rather than re-parsing URLs.
-}
tarballHostGate :: Text -> Text -> Text -> TarballHostGate
tarballHostGate :: Text -> Text -> Text -> TarballHostGate
tarballHostGate Text
privateUrl Text
publicUrl Text
mirrorUrl =
    TarballHostGate
        { thgAllowlist :: LoweredHostSet
thgAllowlist = Set Text -> LoweredHostSet
lowerCaseHosts ([Text] -> Set Text
forall a. Ord a => [a] -> Set a
Set.fromList [Text
privateHost, Text
publicHost, Text -> Text
hostAddress Text
mirrorUrl])
        , thgPrivateHost :: Text
thgPrivateHost = Text
privateHost
        , thgPublicHost :: Text
thgPublicHost = Text
publicHost
        }
  where
    privateHost :: Text
privateHost = Text -> Text
hostAddress Text
privateUrl
    publicHost :: Text
publicHost = Text -> Text
hostAddress Text
publicUrl