{- | The integrity gate is the security crux of the worker.

A mirrored artifact is later served from the private upstream __without re-running
the rules__, so a corrupt or tampered artifact must never enter it. Verification is
therefore the gate: a hash __mismatch fails the job with no publish__ and is logged
loudly. Because the digest is the __serve-time-admitted__ one carried on the job,
the worker mirrors exactly the bytes the rules cleared -- an upstream packument
mutated in the enqueue → process window cannot substitute a different artifact.
-}
module Ecluse.Core.Worker.Integrity (
    IntegrityResult (..),
    verifyIntegrity,
) where

import Data.ByteArray.Encoding (Base (Base16, Base64), convertToBase)
import Data.Foldable (maximumBy)
import Data.Text qualified as T

import Ecluse.Core.Package (Hash (hashAlg, hashValue), HashAlg (SHA256, SRI), computeDigest)
import Ecluse.Core.Package.Integrity (Strength, assertedAlg, integrityStrength, sriBody, sriPrefix)

{- | The result of verifying fetched bytes against the admitted integrity digests.
A sum type, not a 'Bool', so the mismatch carries the detail an operator needs to
explain why a publish was refused.
-}
data IntegrityResult
    = -- | The bytes matched the most authoritative admitted digest.
      IntegrityVerified
    | {- | The bytes failed the integrity gate. Carries a human-readable detail (the
      digest they were checked against, or that the strongest one was uncomputable).
      -}
      IntegrityMismatch Text
    deriving stock (IntegrityResult -> IntegrityResult -> Bool
(IntegrityResult -> IntegrityResult -> Bool)
-> (IntegrityResult -> IntegrityResult -> Bool)
-> Eq IntegrityResult
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: IntegrityResult -> IntegrityResult -> Bool
== :: IntegrityResult -> IntegrityResult -> Bool
$c/= :: IntegrityResult -> IntegrityResult -> Bool
/= :: IntegrityResult -> IntegrityResult -> Bool
Eq, Int -> IntegrityResult -> ShowS
[IntegrityResult] -> ShowS
IntegrityResult -> String
(Int -> IntegrityResult -> ShowS)
-> (IntegrityResult -> String)
-> ([IntegrityResult] -> ShowS)
-> Show IntegrityResult
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> IntegrityResult -> ShowS
showsPrec :: Int -> IntegrityResult -> ShowS
$cshow :: IntegrityResult -> String
show :: IntegrityResult -> String
$cshowList :: [IntegrityResult] -> ShowS
showList :: [IntegrityResult] -> ShowS
Show)

{- | Verify fetched artifact bytes against the __most authoritative__ integrity
digest the version carries -- never against a weaker one while a stronger is present.

A real npm version carries both a modern SRI @sha512@ digest and the legacy SHA-1
@shasum@. Passing on /any/ match would let an artifact that matches the weak SHA-1
but fails the strong @sha512@ through -- and SHA-1 collision resistance is broken, so
that is exploitable. So the gate ranks the admitted digests by algorithm authority
(strongest first: @sha512@ \/ @blake2b@ > @sha384@ > @sha256@ > @sha1@ > @md5@), and
checks the bytes against the strongest one present: the bytes pass __iff__ that digest
matches.
A weaker digest can neither override nor rescue a failed strong one.

The bytes are recomputed in the strongest digest's own algorithm through the shared
'Ecluse.Core.Package.computeDigest', the one definition of which algorithms Écluse can
verify. That computable set covers every algorithm the public integrity floor admits, so an
admitted artifact is always verifiable here. If the strongest digest is nonetheless in an
algorithm 'computeDigest' declines (MD5, a forgeable hash) or an SRI whose inner algorithm
does not resolve, the gate __fails closed__ rather than falling back to a weaker digest: a
tampered artifact must never be admitted on the strength of a hash an attacker could forge.

This is the tamper gate before a publish: a mismatch fails the job and never
publishes a corrupt or substituted artifact into the private upstream.

>>> import Ecluse.Core.Package (mkHash, HashAlg (SHA1))
>>> fmap (\h -> verifyIntegrity (h :| []) "Hello World") (mkHash SHA1 "0a4d55a8d778e5022fab701977c5d840bbc486d0")
Right IntegrityVerified

>>> fmap (\h -> verifyIntegrity (h :| []) "Hello World") (mkHash SHA1 "da39a3ee5e6b4b0d3255bfef95601890afd80709")
Right (IntegrityMismatch "the SHA1 digest did not match the fetched bytes")
-}
verifyIntegrity :: NonEmpty Hash -> ByteString -> IntegrityResult
verifyIntegrity :: NonEmpty Hash -> ByteString -> IntegrityResult
verifyIntegrity NonEmpty Hash
hashes ByteString
bytes =
    let strongest :: Hash
strongest = (Hash -> Hash -> Ordering) -> NonEmpty Hash -> Hash
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
maximumBy ((Hash -> Strength) -> Hash -> Hash -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing Hash -> Strength
authority) NonEmpty Hash
hashes
     in case LByteString -> Hash -> Maybe Bool
matchesDigest (ByteString -> LByteString
forall l s. LazyStrict l s => s -> l
toLazy ByteString
bytes) Hash
strongest of
            Maybe Bool
Nothing ->
                -- Fail closed: the strongest present digest is in an algorithm we
                -- cannot recompute, so we cannot prove the bytes -- never drop to a
                -- weaker digest an attacker could forge.
                Text -> IntegrityResult
IntegrityMismatch
                    ( Text
"the strongest admitted digest ("
                        Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Hash -> Text
describeDigest Hash
strongest
                        Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
") is in an algorithm the worker cannot verify"
                    )
            Just Bool
True -> IntegrityResult
IntegrityVerified
            Just Bool
False ->
                Text -> IntegrityResult
IntegrityMismatch (Text
"the " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Hash -> Text
describeDigest Hash
strongest Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" digest did not match the fetched bytes")

-- Algorithm authority, strongest first, so 'maximumBy' selects the digest a match
-- must be proven against. It reuses the shared 'integrityStrength' ranking so the
-- tamper gate and the serve-admission floor agree on which algorithms are strong.
-- An SRI is ranked by the algorithm it asserts ('assertedAlg' -- npm's @sha512-…@
-- ranks as 'SHA512'); an SRI whose inner alg is unrecognised asserts nothing and ranks
-- at the SHA-256 floor tier (above the legacy SHA-1/MD5). It therefore WINS the
-- 'maximumBy' and, unresolvable, the gate fails closed in 'matchesDigest' rather than
-- downgrading to a weaker computable digest an attacker who also controls it could
-- forge; it stays below a computable sha512, so a real sha512, when co-present, is
-- still preferred and verified.
authority :: Hash -> Strength
authority :: Hash -> Strength
authority = Strength -> (HashAlg -> Strength) -> Maybe HashAlg -> Strength
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (HashAlg -> Strength
integrityStrength HashAlg
SHA256) HashAlg -> Strength
integrityStrength (Maybe HashAlg -> Strength)
-> (Hash -> Maybe HashAlg) -> Hash -> Strength
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Hash -> Maybe HashAlg
assertedAlg

-- Whether the fetched bytes match the chosen digest: resolve its algorithm
-- ('assertedAlg', 'Nothing' for an unresolvable SRI), recompute the bytes in that
-- algorithm ('computeDigest', 'Nothing' for one the worker will not verify against),
-- and compare in the digest's own wire encoding. A hex tag compares case-insensitively
-- (hex is); an SRI's base64 body compares case-sensitively (base64 is; folding its case
-- would admit a digest that matches the bytes only after a case change). Either 'Nothing'
-- is the fail-closed case in 'verifyIntegrity'.
matchesDigest :: LByteString -> Hash -> Maybe Bool
matchesDigest :: LByteString -> Hash -> Maybe Bool
matchesDigest LByteString
lazyBytes Hash
h = do
    alg <- Hash -> Maybe HashAlg
assertedAlg Hash
h
    digestOf <- computeDigest alg
    let digest = LByteString -> ByteString
digestOf LByteString
lazyBytes
    pure $ case hashAlg h of
        HashAlg
SRI -> ByteString -> Text
base64 ByteString
digest Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Text
sriBody (Hash -> Text
hashValue Hash
h)
        HashAlg
_ -> ByteString -> Text
hexLower ByteString
digest Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Text
T.toLower (Hash -> Text
hashValue Hash
h)

-- Name a digest for the mismatch detail: the SRI prefix for an SRI, the
-- algorithm otherwise.
describeDigest :: Hash -> Text
describeDigest :: Hash -> Text
describeDigest Hash
h = case Hash -> HashAlg
hashAlg Hash
h of
    HashAlg
SRI -> Text
"SRI " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Text
sriPrefix (Hash -> Text
hashValue Hash
h)
    HashAlg
alg -> HashAlg -> Text
forall b a. (Show a, IsString b) => a -> b
show HashAlg
alg

-- The lower-cased hex encoding of raw digest bytes (matching npm's hex shasum form).
hexLower :: ByteString -> Text
hexLower :: ByteString -> Text
hexLower ByteString
d = Text -> Text
T.toLower (ByteString -> Text
forall a b. ConvertUtf8 a b => b -> a
decodeUtf8 (Base -> ByteString -> ByteString
forall bin bout.
(ByteArrayAccess bin, ByteArray bout) =>
Base -> bin -> bout
convertToBase Base
Base16 ByteString
d :: ByteString))

-- The standard-base64 encoding of raw digest bytes (matching the SRI @<base64>@ body).
base64 :: ByteString -> Text
base64 :: ByteString -> Text
base64 ByteString
d = ByteString -> Text
forall a b. ConvertUtf8 a b => b -> a
decodeUtf8 (Base -> ByteString -> ByteString
forall bin bout.
(ByteArrayAccess bin, ByteArray bout) =>
Base -> bin -> bout
convertToBase Base
Base64 ByteString
d :: ByteString)