{- | The structured-logging pipeline.

Écluse sits in the install path of someone else's build, so when it refuses a
package or runs slow the operator must see /why/ from the logs alone. This module
stands up a @katip@ 'LogEnv' -- the single log stream every layer attaches context
to -- and chooses its on-the-wire shape:

* __'JsonLog'__ writes __one compact JSON object per line__ to stdout (JSONL): the
  whole physical line /is/ the JSON, with no pretty-printing and no level or
  timestamp prefix outside the object, and any newline inside a field escaped as
  @\\n@ so a record never spans two lines. This is the in-container default, the
  shape a log collector's stdout JSON autodiscovery consumes directly.
* __'ConsoleLog'__ writes the human-readable bracketed form for local development.

A 'LogEnv' built here carries no colour codes even on a terminal, so a captured
JSON line is always valid JSON. The selected format is parsed from @ECLUSE_LOG_FORMAT@
at the configuration boundary ("Ecluse.Config") and the resulting 'LogEnv' is held
in the composition root ("Ecluse.Env").

Trace-ID correlation rides this stream as the @dd@ object ('ddField'): @service@\/
@env@\/@version@ from the resolved telemetry identity, plus the active span's
@trace_id@\/@span_id@ in the id format Datadog expects ('formatDdTraceId'). The object
is built here but stays free of any OpenTelemetry dependency -- the active span is read
and the ids rendered by "Ecluse.Telemetry.Correlation", which composes 'ddField' into a
log site's payload.

== Secrets

A bearer token is carried as the redacted @Secret@ of "Ecluse.Core.Credential", whose
'Show' renders only a placeholder, so token material cannot reach a log field
through any structured payload or message built from it (see
@docs\/architecture\/observability.md@). This module adds no field that would
defeat that redaction.

The model is described in @docs\/architecture\/observability.md@ → "Logs".
-}
module Ecluse.Log (
    -- * Log format
    LogFormat (..),
    parseLogFormat,
    renderLogFormat,

    -- * Pipeline construction
    newLogEnv,
    newScribe,
    formatterFor,

    -- * Structured context
    auditContext,
    moduleField,

    -- * Datadog trace correlation
    DdContext (..),
    DdSpan (..),
    ddField,
    ddObject,
    formatDdTraceId,
    formatDdSpanId,

    -- * Rendering (for serialise-and-assert)
    renderLogLine,
) where

import Data.Aeson (Value, object, (.=))
import Data.ByteString qualified as BS
import Data.Text.Lazy qualified as TL
import Data.Text.Lazy.Builder qualified as TB
import Katip (
    ColorStrategy (ColorLog),
    Environment,
    Item,
    LogEnv,
    LogItem,
    Namespace (Namespace),
    Scribe,
    Severity (DebugS),
    SimpleLogPayload,
    Verbosity (V2),
    defaultScribeSettings,
    initLogEnv,
    permitItem,
    registerScribe,
    sl,
 )
import Katip.Scribes.Handle (ItemFormatter, bracketFormat, jsonFormat, mkHandleScribeWithFormatter)

import Ecluse.Core.Wire (WireVocab (..), parseWire, renderWire)

{- | The on-the-wire shape of the log stream, selected by configuration. A sum
type rather than a 'Bool' so each case names its intent and a new shape is a new
constructor, not a second flag.
-}
data LogFormat
    = {- | One compact JSON object per line to stdout (JSONL) -- the in-container
      default a log collector's stdout JSON parsing consumes.
      -}
      JsonLog
    | -- | The human-readable bracketed form, for local development.
      ConsoleLog
    deriving stock (LogFormat -> LogFormat -> Bool
(LogFormat -> LogFormat -> Bool)
-> (LogFormat -> LogFormat -> Bool) -> Eq LogFormat
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: LogFormat -> LogFormat -> Bool
== :: LogFormat -> LogFormat -> Bool
$c/= :: LogFormat -> LogFormat -> Bool
/= :: LogFormat -> LogFormat -> Bool
Eq, Int -> LogFormat -> ShowS
[LogFormat] -> ShowS
LogFormat -> String
(Int -> LogFormat -> ShowS)
-> (LogFormat -> String)
-> ([LogFormat] -> ShowS)
-> Show LogFormat
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> LogFormat -> ShowS
showsPrec :: Int -> LogFormat -> ShowS
$cshow :: LogFormat -> String
show :: LogFormat -> String
$cshowList :: [LogFormat] -> ShowS
showList :: [LogFormat] -> ShowS
Show)

-- The wire vocabulary of a 'LogFormat': the single source both 'parseWire' and
-- 'renderWire' derive from for this type.
instance WireVocab LogFormat where
    wireKind :: Text
wireKind = Text
"log format"
    wireTable :: NonEmpty (LogFormat, Text)
wireTable =
        (LogFormat
JsonLog, Text
"json")
            (LogFormat, Text)
-> [(LogFormat, Text)] -> NonEmpty (LogFormat, Text)
forall a. a -> [a] -> NonEmpty a
:| [(LogFormat
ConsoleLog, Text
"console")]

{- | Parse a 'LogFormat' from its wire name, naming the accepted set on failure.
The same strict, fail-loud style as the other configuration enums
("Ecluse.Config").

>>> parseLogFormat "json"
Right JsonLog

>>> parseLogFormat "yaml"
Left "unknown log format \"yaml\" (expected one of: json, console)"
-}
parseLogFormat :: Text -> Either Text LogFormat
parseLogFormat :: Text -> Either Text LogFormat
parseLogFormat = Text -> Either Text LogFormat
forall a. WireVocab a => Text -> Either Text a
parseWire

-- | The wire name of a 'LogFormat' (the inverse of 'parseLogFormat').
renderLogFormat :: LogFormat -> Text
renderLogFormat :: LogFormat -> Text
renderLogFormat = LogFormat -> Text
forall a. (Eq a, WireVocab a) => a -> Text
renderWire

{- | Build the application 'LogEnv': a @katip@ environment under the @ecluse@
namespace with a single stdout scribe in the chosen 'LogFormat'. This is the value
the composition root holds and every later layer logs through.

The scribe admits every severity ('DebugS' upward); a deployment narrows what it
keeps through @katip@'s own verbosity controls rather than by rebuilding the
environment.
-}
newLogEnv :: LogFormat -> Environment -> IO LogEnv
newLogEnv :: LogFormat -> Environment -> IO LogEnv
newLogEnv LogFormat
format Environment
environment = do
    scribe <- LogFormat -> IO Scribe
newScribe LogFormat
format
    base <- initLogEnv (Namespace ["ecluse"]) environment
    registerScribe "stdout" scribe defaultScribeSettings base

{- | Build the stdout 'Scribe' for a 'LogFormat'. Colour is forced __off__
('ColorLog' 'False') so a captured 'JsonLog' line is always valid JSON -- no ANSI
escapes leak into the object even when stdout is a terminal. The handle scribe
writes each item as exactly one line (the formatter output plus a single trailing
newline), which is what makes 'JsonLog' a true JSONL stream.
-}
newScribe :: LogFormat -> IO Scribe
newScribe :: LogFormat -> IO Scribe
newScribe LogFormat
format =
    (forall a. LogItem a => ItemFormatter a)
-> ColorStrategy -> Handle -> PermitFunc -> Verbosity -> IO Scribe
mkHandleScribeWithFormatter
        (LogFormat -> ItemFormatter a
forall a. LogItem a => LogFormat -> ItemFormatter a
formatterFor LogFormat
format)
        (Bool -> ColorStrategy
ColorLog Bool
False)
        Handle
stdout
        (Severity -> Item a -> IO Bool
forall (m :: * -> *) a. Monad m => Severity -> Item a -> m Bool
permitItem Severity
DebugS)
        Verbosity
V2

{- | The @katip@ 'ItemFormatter' a 'LogFormat' wires into its scribe: the compact
one-line JSON encoder for 'JsonLog', the bracketed human form for 'ConsoleLog'.

Exposed so a test can render an item through the exact formatter the scribe uses,
asserting on the serialised line without writing to stdout (see 'renderLogLine').
-}
formatterFor :: (LogItem a) => LogFormat -> ItemFormatter a
formatterFor :: forall a. LogItem a => LogFormat -> ItemFormatter a
formatterFor = \case
    LogFormat
JsonLog -> ItemFormatter a
forall a. LogItem a => ItemFormatter a
jsonFormat
    LogFormat
ConsoleLog -> ItemFormatter a
forall a. LogItem a => ItemFormatter a
bracketFormat

{- | The structured context for an audit event -- a denial or other rule decision --
carrying the @package@, @version@, and @rule@ the operator needs to explain a 403
from the log line alone. These are the high-cardinality identifiers that belong on
the log line, never on a metric label (see
@docs\/architecture\/observability.md@ → "Cardinality and attributes").

Attach it to a log call as the structured payload; @katip@ renders the three keys
into the line's @data@ object.
-}
auditContext ::
    -- | The package the decision concerns.
    Text ->
    -- | The package version.
    Text ->
    -- | The name of the rule that decided.
    Text ->
    SimpleLogPayload
auditContext :: Text -> Text -> Text -> SimpleLogPayload
auditContext Text
package Text
version Text
rule =
    Text -> Text -> SimpleLogPayload
forall a. ToJSON a => Text -> a -> SimpleLogPayload
sl Text
"package" Text
package SimpleLogPayload -> SimpleLogPayload -> SimpleLogPayload
forall a. Semigroup a => a -> a -> a
<> Text -> Text -> SimpleLogPayload
forall a. ToJSON a => Text -> a -> SimpleLogPayload
sl Text
"version" Text
version SimpleLogPayload -> SimpleLogPayload -> SimpleLogPayload
forall a. Semigroup a => a -> a -> a
<> Text -> Text -> SimpleLogPayload
forall a. ToJSON a => Text -> a -> SimpleLogPayload
sl Text
"rule" Text
rule

{- | The structured context naming the __source module__ a log line was emitted
from, so every JSON record carries a @module@ field (e.g.
@"module":"Ecluse.Server.Pipeline"@). Compose it into a log site's payload alongside
the event's own fields, so the stream can be filtered by emitter without leaning on
the @katip@ namespace. @katip@ renders the key into the line's @data@ object. This is
the standard tag for a log raised off the 'Handler' reader (a plain-'IO' path that
opens its own context through the composition-root 'LogEnv').
-}
moduleField :: Text -> SimpleLogPayload
moduleField :: Text -> SimpleLogPayload
moduleField = Text -> Text -> SimpleLogPayload
forall a. ToJSON a => Text -> a -> SimpleLogPayload
sl Text
"module"

{- | The unified-service identity stamped onto every log line as the @dd@ object, plus
the active span's ids when one is in scope. @service@\/@env@\/@version@ come from the
same resolved telemetry identity as the traces ("Ecluse.Telemetry.Resolve"), so logs
and traces share one identity; the trace\/span ids are present only when a span is
active (filled by "Ecluse.Telemetry.Correlation" off the OpenTelemetry context).
-}
data DdContext = DdContext
    { DdContext -> Text
ddService :: Text
    -- ^ @dd.service@ -- the resolved service name.
    , DdContext -> Maybe Text
ddEnv :: Maybe Text
    -- ^ @dd.env@ -- the deployment environment, when configured.
    , DdContext -> Maybe Text
ddVersion :: Maybe Text
    -- ^ @dd.version@ -- the service version, when configured.
    , DdContext -> Maybe DdSpan
ddSpan :: Maybe DdSpan
    -- ^ The active span's correlation ids, when a span is in scope.
    }
    deriving stock (DdContext -> DdContext -> Bool
(DdContext -> DdContext -> Bool)
-> (DdContext -> DdContext -> Bool) -> Eq DdContext
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: DdContext -> DdContext -> Bool
== :: DdContext -> DdContext -> Bool
$c/= :: DdContext -> DdContext -> Bool
/= :: DdContext -> DdContext -> Bool
Eq, Int -> DdContext -> ShowS
[DdContext] -> ShowS
DdContext -> String
(Int -> DdContext -> ShowS)
-> (DdContext -> String)
-> ([DdContext] -> ShowS)
-> Show DdContext
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> DdContext -> ShowS
showsPrec :: Int -> DdContext -> ShowS
$cshow :: DdContext -> String
show :: DdContext -> String
$cshowList :: [DdContext] -> ShowS
showList :: [DdContext] -> ShowS
Show)

{- | The active span's ids, __already in the id format Datadog expects__ (see
'formatDdTraceId' \/ 'formatDdSpanId'). Held as rendered 'Text' so this type stays free
of any OpenTelemetry dependency.
-}
data DdSpan = DdSpan
    { DdSpan -> Text
ddTraceId :: Text
    -- ^ @dd.trace_id@ -- the trace id in Datadog form.
    , DdSpan -> Text
ddSpanId :: Text
    -- ^ @dd.span_id@ -- the span id in Datadog form.
    }
    deriving stock (DdSpan -> DdSpan -> Bool
(DdSpan -> DdSpan -> Bool)
-> (DdSpan -> DdSpan -> Bool) -> Eq DdSpan
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: DdSpan -> DdSpan -> Bool
== :: DdSpan -> DdSpan -> Bool
$c/= :: DdSpan -> DdSpan -> Bool
/= :: DdSpan -> DdSpan -> Bool
Eq, Int -> DdSpan -> ShowS
[DdSpan] -> ShowS
DdSpan -> String
(Int -> DdSpan -> ShowS)
-> (DdSpan -> String) -> ([DdSpan] -> ShowS) -> Show DdSpan
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> DdSpan -> ShowS
showsPrec :: Int -> DdSpan -> ShowS
$cshow :: DdSpan -> String
show :: DdSpan -> String
$cshowList :: [DdSpan] -> ShowS
showList :: [DdSpan] -> ShowS
Show)

{- | The @dd@ object as JSON: @service@ always, @env@\/@version@ when configured, and
@trace_id@\/@span_id@ only when a span is active. This is the object a log collector's
unified-service tagging and trace-to-log correlation read.
-}
ddObject :: DdContext -> Value
ddObject :: DdContext -> Value
ddObject DdContext
ctx =
    [Pair] -> Value
object ([Pair] -> Value) -> [Pair] -> Value
forall a b. (a -> b) -> a -> b
$
        [Maybe Pair] -> [Pair]
forall a. [Maybe a] -> [a]
catMaybes
            [ Pair -> Maybe Pair
forall a. a -> Maybe a
Just (Key
"service" Key -> Text -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall e kv v. (KeyValue e kv, ToJSON v) => Key -> v -> kv
.= DdContext -> Text
ddService DdContext
ctx)
            , (Key
"env" Key -> Text -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall e kv v. (KeyValue e kv, ToJSON v) => Key -> v -> kv
.=) (Text -> Pair) -> Maybe Text -> Maybe Pair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> DdContext -> Maybe Text
ddEnv DdContext
ctx
            , (Key
"version" Key -> Text -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall e kv v. (KeyValue e kv, ToJSON v) => Key -> v -> kv
.=) (Text -> Pair) -> Maybe Text -> Maybe Pair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> DdContext -> Maybe Text
ddVersion DdContext
ctx
            , (Key
"trace_id" Key -> Text -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall e kv v. (KeyValue e kv, ToJSON v) => Key -> v -> kv
.=) (Text -> Pair) -> (DdSpan -> Text) -> DdSpan -> Pair
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DdSpan -> Text
ddTraceId (DdSpan -> Pair) -> Maybe DdSpan -> Maybe Pair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> DdContext -> Maybe DdSpan
ddSpan DdContext
ctx
            , (Key
"span_id" Key -> Text -> Pair
forall v. ToJSON v => Key -> v -> Pair
forall e kv v. (KeyValue e kv, ToJSON v) => Key -> v -> kv
.=) (Text -> Pair) -> (DdSpan -> Text) -> DdSpan -> Pair
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DdSpan -> Text
ddSpanId (DdSpan -> Pair) -> Maybe DdSpan -> Maybe Pair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> DdContext -> Maybe DdSpan
ddSpan DdContext
ctx
            ]

{- | The @dd@ object as a @katip@ structured payload, nested under the @dd@ key. Compose
it into a log site's payload so the rendered JSON line carries
@"dd":{"service":…,"trace_id":…}@ for trace-to-log correlation.
-}
ddField :: DdContext -> SimpleLogPayload
ddField :: DdContext -> SimpleLogPayload
ddField = Text -> Value -> SimpleLogPayload
forall a. ToJSON a => Text -> a -> SimpleLogPayload
sl Text
"dd" (Value -> SimpleLogPayload)
-> (DdContext -> Value) -> DdContext -> SimpleLogPayload
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DdContext -> Value
ddObject

{- | Render a raw 16-byte trace id into the id format Datadog correlates on: the
__unsigned decimal of the low 64 bits__. Datadog's log↔trace correlation matches
@dd.trace_id@ as a decimal 64-bit value (the low half of an OpenTelemetry 128-bit id);
the full-128-bit-hex form is a separate opt-in not used here. Reads the last eight bytes
big-endian, so a shorter id is taken whole and a longer one is truncated to its low 64
bits -- never a partial-byte misread.
-}
formatDdTraceId :: ByteString -> Text
formatDdTraceId :: ByteString -> Text
formatDdTraceId = Word64 -> Text
forall b a. (Show a, IsString b) => a -> b
show (Word64 -> Text) -> (ByteString -> Word64) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Word64
low64Bits

{- | Render a raw 8-byte span id into the Datadog form: the __unsigned decimal__ of the
64-bit id (read big-endian), matching @dd.span_id@.
-}
formatDdSpanId :: ByteString -> Text
formatDdSpanId :: ByteString -> Text
formatDdSpanId = Word64 -> Text
forall b a. (Show a, IsString b) => a -> b
show (Word64 -> Text) -> (ByteString -> Word64) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Word64
low64Bits

-- The unsigned 64-bit value of the last (up to) eight bytes, big-endian. Shared by the
-- trace-id low-64 truncation and the span-id read so both decode identically.
low64Bits :: ByteString -> Word64
low64Bits :: ByteString -> Word64
low64Bits = (Word64 -> Word8 -> Word64) -> Word64 -> ByteString -> Word64
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
BS.foldl' (\Word64
acc Word8
byte -> Word64
acc Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
256 Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
+ Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
byte) Word64
0 (ByteString -> Word64)
-> (ByteString -> ByteString) -> ByteString -> Word64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ByteString -> ByteString
lastBytes Int
8
  where
    lastBytes :: Int -> ByteString -> ByteString
    lastBytes :: Int -> ByteString -> ByteString
lastBytes Int
n ByteString
bytes = Int -> ByteString -> ByteString
BS.drop (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
0 (ByteString -> Int
BS.length ByteString
bytes Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
n)) ByteString
bytes

{- | Render a single log 'Item' to the exact text the scribe for this 'LogFormat'
writes for it -- the formatter output for one item, without the trailing newline
the handle scribe appends to separate physical lines.

This is what the unit tests assert on: it reproduces the scribe's serialisation
with no stdout dependency, so a 'JsonLog' line can be checked for being a single
compact object with escaped newlines, and a 'ConsoleLog' line for the
human-readable form.
-}
renderLogLine :: (LogItem a) => LogFormat -> Item a -> Text
renderLogLine :: forall a. LogItem a => LogFormat -> Item a -> Text
renderLogLine LogFormat
format Item a
item =
    LazyText -> Text
TL.toStrict (Builder -> LazyText
TB.toLazyText (LogFormat -> ItemFormatter a
forall a. LogItem a => LogFormat -> ItemFormatter a
formatterFor LogFormat
format Bool
False Verbosity
V2 Item a
item))