{- | The npm realization of the serve-path read operations: fetch a package's full
packument and project it into the domain manifest, reporting every failure as a typed
'MetadataError'.

npm satisfies both serve-path needs from the /same/ full-packument endpoint: the
publish-age rules require the packument's @time@ map, which npm exposes only in the
full form, so even the single-version need fetches the full bytes. This module owns the
npm side of both serve-path operations -- the fetch and the projection -- while the cache,
metrics, and single-version cache topology are wired around them by the serve layer
("Ecluse.Core.Server.Metadata"), which is where the cross-cutting caching policy belongs:

  * 'fetchNpmManifest' \/ 'projectNpmManifest' back the full-manifest operation. The
    projection is the sequence the serve path has always applied to a fetched packument --
    decode, bound the nesting depth, project and validate the self-reported name, bound the
    version count -- re-expressed as a total 'Either' so the serve path maps each cause onto
    a response rather than catching a typed throw.

  * 'fetchNpmVersion' \/ 'projectNpmVersion' back the single-version operation. The full
    bytes are still fetched (npm carries @time@ only in the full form), but they are parsed
    __selectively__ ("Ecluse.Core.Registry.Npm.SelectiveDecode"): only the requested
    version's object and @time@ entry are materialised, the others skipped unallocated, so
    a cold tarball gate no longer pays a whole-packument decode to consult one version. The
    selected version is projected through the /same/ per-version code the full path runs, so
    its 'Ecluse.Core.Package.PackageDetails' is identical to selecting it out of a full
    projection -- the optimization the stable boundary was always meant to admit.
-}
module Ecluse.Core.Registry.Npm.Metadata (
    -- * npm full-manifest fetch
    fetchNpmManifest,

    -- * npm single-version fetch
    fetchNpmVersion,

    -- * Pure projection
    projectNpmManifest,
    projectNpmVersion,
) where

import Data.Aeson (Value, eitherDecodeStrict, parseJSON)
import Data.Aeson.Types (parseMaybe)
import Data.Time (UTCTime)
import UnliftIO.Exception (handle)

import Ecluse.Core.Ecosystem (Ecosystem (Npm))
import Ecluse.Core.Package (
    PackageDetails,
    PackageInfo,
    PackageName,
    renderPackageName,
 )
import Ecluse.Core.Registry (RegistryResponse (responseBody))
import Ecluse.Core.Registry.Metadata (
    Manifest (Manifest, manifestDigest, manifestInfo, manifestRaw),
    MetadataError (MetadataBoundExceeded, MetadataNameMismatch, MetadataUndecodable),
    digestOf,
 )
import Ecluse.Core.Registry.Npm (
    NpmClientConfig (npmBaseUrl, npmLimits),
    ResponseBoundExceeded (ResponseBoundExceeded),
    fetchMetadataForm,
 )
import Ecluse.Core.Registry.Npm.Project (
    Projection (NameMismatch, Projected),
    enforceTarballScheme,
    enforceTarballSchemeDetails,
    parsePackageInfoFromValue,
    projectName,
    projectVersionEntry,
 )
import Ecluse.Core.Registry.Npm.Request (
    MetadataForm (Full),
    noValidators,
 )
import Ecluse.Core.Registry.Npm.SelectiveDecode (
    SelectedVersion (svName, svTime, svVersion, svVersionCount),
    SelectiveError (SelectiveTooDeeplyNested, SelectiveUndecodable),
    selectVersionFromPackument,
 )
import Ecluse.Core.Security (
    LimitError (TooDeeplyNested, TooManyVersions),
    Limits,
    checkNestingDepth,
    checkVersionCount,
    maxNestingDepth,
    maxVersionCount,
 )
import Ecluse.Core.Telemetry.Span (TracingPort (spanMetadataDecode, spanMetadataFetch))
import Ecluse.Core.Version (Version, mkVersion, renderVersion)

{- | Fetch a package's full packument and project it into a 'Manifest' (typed view,
raw document, and the wire bytes' 'ContentDigest'), or the typed 'MetadataError' for
why it could not.

The body is read bounded against the config's response budget (so an oversized upstream
is refused fail-closed before it is buffered whole); a breach surfaces as
'MetadataBoundExceeded'. A genuine transport fault is left to throw -- the serve path
already brackets the unreachable-upstream case -- so this 'Either' carries only the
parse-and-policy outcomes the serve path renders distinctly.

The digest is computed here, over the strict body the bounded read already produced:
the one place the wire bytes exist, so no later stage re-encodes the document just to
fingerprint it.
-}
fetchNpmManifest :: TracingPort -> NpmClientConfig -> PackageName -> IO (Either MetadataError Manifest)
fetchNpmManifest :: TracingPort
-> NpmClientConfig
-> PackageName
-> IO (Either MetadataError Manifest)
fetchNpmManifest TracingPort
tracing NpmClientConfig
config PackageName
name =
    (ResponseBoundExceeded -> IO (Either MetadataError Manifest))
-> IO (Either MetadataError Manifest)
-> IO (Either MetadataError Manifest)
forall (m :: * -> *) e a.
(MonadUnliftIO m, Exception e) =>
(e -> m a) -> m a -> m a
handle (\(ResponseBoundExceeded LimitError
err) -> Either MetadataError Manifest -> IO (Either MetadataError Manifest)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (MetadataError -> Either MetadataError Manifest
forall a b. a -> Either a b
Left (LimitError -> MetadataError
MetadataBoundExceeded LimitError
err))) (IO (Either MetadataError Manifest)
 -> IO (Either MetadataError Manifest))
-> IO (Either MetadataError Manifest)
-> IO (Either MetadataError Manifest)
forall a b. (a -> b) -> a -> b
$ do
        response <- TracingPort -> forall a. PackageName -> IO a -> IO a
spanMetadataFetch TracingPort
tracing PackageName
name (IO RegistryResponse -> IO RegistryResponse)
-> IO RegistryResponse -> IO RegistryResponse
forall a b. (a -> b) -> a -> b
$ NpmClientConfig
-> MetadataForm -> Validators -> PackageName -> IO RegistryResponse
fetchMetadataForm NpmClientConfig
config MetadataForm
Full Validators
noValidators PackageName
name
        let body = RegistryResponse -> ByteString
responseBody RegistryResponse
response
        spanMetadataDecode tracing name $
            pure
                ( manifestOf (digestOf body) . first (enforceTarballScheme (npmBaseUrl config))
                    <$> projectNpmManifest (npmLimits config) name body
                )
  where
    manifestOf :: ContentDigest -> (PackageInfo, Value) -> Manifest
manifestOf ContentDigest
digest (PackageInfo
info, Value
raw) = Manifest{manifestInfo :: PackageInfo
manifestInfo = PackageInfo
info, manifestRaw :: Value
manifestRaw = Value
raw, manifestDigest :: ContentDigest
manifestDigest = ContentDigest
digest}

{- | Project a fetched packument's bytes into @(manifest, raw document)@, applying the
serve path's response bounds and name validation. Pure and total.

The sequence -- decode to a 'Value', bound its nesting depth, project the typed
'PackageInfo' and validate its self-reported name against the request, then bound the
version count -- is the one the serve path has always run; the raw 'Value' returned is
the nesting-checked document the serve path edits in place, so the typed view and the
served bytes describe the same parse. Each refusal maps to the constructor the serve
path renders: a decode failure or an absent\/undecodable name is 'MetadataUndecodable';
a self-reported /different/ name is 'MetadataNameMismatch'; a nesting-depth or
version-count breach is 'MetadataBoundExceeded'.
-}
projectNpmManifest :: Limits -> PackageName -> ByteString -> Either MetadataError (PackageInfo, Value)
projectNpmManifest :: Limits
-> PackageName
-> ByteString
-> Either MetadataError (PackageInfo, Value)
projectNpmManifest Limits
limits PackageName
name ByteString
body = do
    value <- (String -> MetadataError)
-> Either String Value -> Either MetadataError Value
forall a b c. (a -> b) -> Either a c -> Either b c
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (MetadataError -> String -> MetadataError
forall a b. a -> b -> a
const MetadataError
MetadataUndecodable) (ByteString -> Either String Value
forall a. FromJSON a => ByteString -> Either String a
eitherDecodeStrict ByteString
body)
    bounded <- first MetadataBoundExceeded (checkNestingDepth limits value)
    info <- case parsePackageInfoFromValue name bounded of
        Left ParseError
_ -> MetadataError -> Either MetadataError PackageInfo
forall a b. a -> Either a b
Left MetadataError
MetadataUndecodable
        Right (NameMismatch Text
reported) -> MetadataError -> Either MetadataError PackageInfo
forall a b. a -> Either a b
Left (Text -> MetadataError
MetadataNameMismatch Text
reported)
        Right (Projected PackageInfo
projected) -> PackageInfo -> Either MetadataError PackageInfo
forall a b. b -> Either a b
Right PackageInfo
projected
    boundedInfo <- first MetadataBoundExceeded (checkVersionCount limits info)
    pure (boundedInfo, bounded)

{- | Fetch a package's full packument and project __only the requested version__ into its
'PackageDetails', or the typed 'MetadataError' for why it could not -- the cheap counterpart
to 'fetchNpmManifest' for the single-version serve operation.

npm carries the @time@ map only in the full document, so the __full bytes are still
fetched__ (bounded against the config's budget, exactly as 'fetchNpmManifest'); the win is
that they are parsed __selectively__ ('projectNpmVersion'), materialising the one requested
version rather than every version. A 'Nothing' is a version genuinely absent from a sound
document (a forwarded miss); a 'MetadataError' is metadata that could not be obtained at all.
A transport fault is left to throw, as 'fetchNpmManifest'.
-}
fetchNpmVersion :: TracingPort -> NpmClientConfig -> PackageName -> Version -> IO (Either MetadataError (Maybe PackageDetails))
fetchNpmVersion :: TracingPort
-> NpmClientConfig
-> PackageName
-> Version
-> IO (Either MetadataError (Maybe PackageDetails))
fetchNpmVersion TracingPort
tracing NpmClientConfig
config PackageName
name Version
version =
    (ResponseBoundExceeded
 -> IO (Either MetadataError (Maybe PackageDetails)))
-> IO (Either MetadataError (Maybe PackageDetails))
-> IO (Either MetadataError (Maybe PackageDetails))
forall (m :: * -> *) e a.
(MonadUnliftIO m, Exception e) =>
(e -> m a) -> m a -> m a
handle (\(ResponseBoundExceeded LimitError
err) -> Either MetadataError (Maybe PackageDetails)
-> IO (Either MetadataError (Maybe PackageDetails))
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (MetadataError -> Either MetadataError (Maybe PackageDetails)
forall a b. a -> Either a b
Left (LimitError -> MetadataError
MetadataBoundExceeded LimitError
err))) (IO (Either MetadataError (Maybe PackageDetails))
 -> IO (Either MetadataError (Maybe PackageDetails)))
-> IO (Either MetadataError (Maybe PackageDetails))
-> IO (Either MetadataError (Maybe PackageDetails))
forall a b. (a -> b) -> a -> b
$ do
        response <- TracingPort -> forall a. PackageName -> IO a -> IO a
spanMetadataFetch TracingPort
tracing PackageName
name (IO RegistryResponse -> IO RegistryResponse)
-> IO RegistryResponse -> IO RegistryResponse
forall a b. (a -> b) -> a -> b
$ NpmClientConfig
-> MetadataForm -> Validators -> PackageName -> IO RegistryResponse
fetchMetadataForm NpmClientConfig
config MetadataForm
Full Validators
noValidators PackageName
name
        spanMetadataDecode tracing name $
            pure ((>>= enforceTarballSchemeDetails (npmBaseUrl config)) <$> projectNpmVersion (npmLimits config) name version (responseBody response))

{- | Project a fetched packument's bytes into __one version's__ 'PackageDetails' (or the
typed 'MetadataError'), without decoding the other versions. Pure and total.

The outcome is the same the whole-document path would reach for that one version, computed
selectively: 'Ecluse.Core.Registry.Npm.SelectiveDecode.selectVersionFromPackument' walks the
token stream -- depth-bounding every value (the 'maxNestingDepth' ceiling
'projectNpmManifest' applies through 'checkNestingDepth') and reporting malformed JSON as
'MetadataUndecodable' -- and materialises only the document @name@, the requested version's
object, and its @time@ entry. Those are then validated and projected exactly as
'projectNpmManifest' would:

  * the self-reported @name@ is validated against the request -- an absent\/undecodable name
    is 'MetadataUndecodable', a self-reported /different/ name is 'MetadataNameMismatch' (the
    anti-shadowing distinction);
  * the @versions@ count is bounded against 'maxVersionCount' (the raw entry count -- a
    fail-closed defence-in-depth backstop on this path, which evaluates only the one version
    regardless, so it never needs the projected count the full path bounds);
  * the requested version's object is projected through
    'Ecluse.Core.Registry.Npm.Project.projectVersionEntry' -- the same per-version projection
    the full path runs -- so a present version yields a 'PackageDetails' identical to
    @'Data.Map.Strict.lookup'@-ing it out of a full 'projectNpmManifest', and an
    absent\/unprojectable version yields 'Nothing'.
-}
projectNpmVersion :: Limits -> PackageName -> Version -> ByteString -> Either MetadataError (Maybe PackageDetails)
projectNpmVersion :: Limits
-> PackageName
-> Version
-> ByteString
-> Either MetadataError (Maybe PackageDetails)
projectNpmVersion Limits
limits PackageName
name Version
version ByteString
body = do
    selected <- (SelectiveError -> MetadataError)
-> Either SelectiveError SelectedVersion
-> Either MetadataError SelectedVersion
forall a b c. (a -> b) -> Either a c -> Either b c
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (Limits -> SelectiveError -> MetadataError
selectiveError Limits
limits) (Int
-> Version -> ByteString -> Either SelectiveError SelectedVersion
selectVersionFromPackument (Limits -> Int
maxNestingDepth Limits
limits) Version
version ByteString
body)
    -- The self-reported name is the validation authority (anti-shadowing), checked before
    -- the version-count backstop -- the same order 'projectNpmManifest' validates the name
    -- before bounding the count.
    reported <- validateReportedName (svName selected)
    when (reported /= name) (Left (MetadataNameMismatch (renderPackageName reported)))
    when
        (svVersionCount selected > maxVersionCount limits)
        (Left (MetadataBoundExceeded (TooManyVersions (svVersionCount selected) (maxVersionCount limits))))
    publishedAt <- parsePublishTime (svTime selected)
    -- 'mkVersion' over the requested version's rendered key matches the whole-document path,
    -- which keys 'projectVersions' by that same string and so projects the version under it.
    pure (svVersion selected >>= projectVersionEntry name (mkVersion Npm (renderVersion version)) publishedAt)

-- The document's self-reported name, validated as the whole-document decode does: an
-- absent name defaults to the empty string and so fails 'projectName' (undecodable), a
-- present non-string fails the @Text@ decode (undecodable), and a well-formed name is
-- the 'PackageName' the request is matched against.
validateReportedName :: Maybe Value -> Either MetadataError PackageName
validateReportedName :: Maybe Value -> Either MetadataError PackageName
validateReportedName = \case
    Maybe Value
Nothing -> MetadataError -> Either MetadataError PackageName
forall a b. a -> Either a b
Left MetadataError
MetadataUndecodable
    Just Value
nameValue -> case (Value -> Parser Text) -> Value -> Maybe Text
forall a b. (a -> Parser b) -> a -> Maybe b
parseMaybe Value -> Parser Text
forall a. FromJSON a => Value -> Parser a
parseJSON Value
nameValue of
        Maybe Text
Nothing -> MetadataError -> Either MetadataError PackageName
forall a b. a -> Either a b
Left MetadataError
MetadataUndecodable
        Just Text
raw -> (ParseError -> MetadataError)
-> Either ParseError PackageName
-> Either MetadataError PackageName
forall a b c. (a -> b) -> Either a c -> Either b c
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (MetadataError -> ParseError -> MetadataError
forall a b. a -> b -> a
const MetadataError
MetadataUndecodable) (Text -> Either ParseError PackageName
projectName Text
raw)

-- The requested version's publish stamp, folded leniently to match the whole-document
-- path: absent is no stamp ('Nothing'), and a present-but-un-decodable stamp is also
-- 'Nothing' (the version has no known publish time) rather than a document failure:
-- the full path drops a malformed @time@ entry per-entry, so the requested version it
-- would project there carries no time, and the selective projection must agree. (A
-- structurally-malformed-JSON stamp is still a 'SelectiveUndecodable' from the walk, as
-- it is an 'eitherDecodeStrict' failure on the full path.)
parsePublishTime :: Maybe Value -> Either MetadataError (Maybe UTCTime)
parsePublishTime :: Maybe Value -> Either MetadataError (Maybe UTCTime)
parsePublishTime = \case
    Maybe Value
Nothing -> Maybe UTCTime -> Either MetadataError (Maybe UTCTime)
forall a b. b -> Either a b
Right Maybe UTCTime
forall a. Maybe a
Nothing
    Just Value
timeValue -> Maybe UTCTime -> Either MetadataError (Maybe UTCTime)
forall a b. b -> Either a b
Right ((Value -> Parser UTCTime) -> Value -> Maybe UTCTime
forall a b. (a -> Parser b) -> a -> Maybe b
parseMaybe Value -> Parser UTCTime
forall a. FromJSON a => Value -> Parser a
parseJSON Value
timeValue)

-- Map a selective-decode refusal onto the 'MetadataError' the whole-document path raises
-- for the same cause: malformed\/non-object bytes are 'MetadataUndecodable', a depth breach
-- is the 'maxNestingDepth' bound 'checkNestingDepth' reports.
selectiveError :: Limits -> SelectiveError -> MetadataError
selectiveError :: Limits -> SelectiveError -> MetadataError
selectiveError Limits
limits = \case
    SelectiveError
SelectiveUndecodable -> MetadataError
MetadataUndecodable
    SelectiveError
SelectiveTooDeeplyNested -> LimitError -> MetadataError
MetadataBoundExceeded (Int -> LimitError
TooDeeplyNested (Limits -> Int
maxNestingDepth Limits
limits))