{- | The PEP 440 grammar and ordering (PyPI).

Parses a PEP 440 version into a 'Pep440Key' -- the canonical ordering tuple
@(epoch, release, pre, post, dev, local)@ -- canonicalising non-normalised
spellings (@1.0ALPHA1@, @1.0-1@, trailing zeros, …) along the way. Release has
trailing zeros stripped (@1.0 == 1.0.0@), and the rank tuples encode PEP 440's
None-handling so 'Ord' on 'Pep440Key' reproduces the spec ordering directly:

* @p440Pre@ is @(band, stage, n)@ where @band@ is __0__ for a dev release with
  no prerelease and no post (it sorts /before/ all prereleases, e.g.
  @1.0.dev1 < 1.0a1@), __1__ for an actual prerelease (with @stage@ a\/b\/rc and
  its number), and __2__ for a final or post release (sorts after prereleases).
* @p440Post@ is @(0,0)@ when absent, so a final sorts below any post-release.
* @p440Dev@ is @(0,n)@ when present and @(1,0)@ when absent, so a dev release
  sorts below its non-dev sibling.

A PEP 440 version is __stable__ iff it is neither a pre-release (@a@\/@b@\/@rc@)
nor a dev release; post-releases stay stable.
-}
module Ecluse.Core.Version.Pep440 (
    Pep440Key (..),
    parsePep440,
    parsePep440Suffix,
    consumePre,
    consumePost,
    consumeDev,
    dropSep,
    isPep440Stable,
) where

import Data.Char (isDigit)
import Data.List (dropWhileEnd, unsnoc)
import Data.Text qualified as T

import Ecluse.Core.Version.Token (VToken (..), isAsciiAlphaNum, maxVersionLength, numOr0, parseNumSeg)

{- | A parsed PEP 440 version as its canonical ordering key:
@(epoch, release, pre, post, dev, local)@. Release has trailing zeros stripped
(@1.0 == 1.0.0@). The rank tuples encode PEP 440's None-handling:

\* @p440Pre@ is @(band, stage, n)@ where @band@ is __0__ for a dev release with
  no prerelease and no post (it sorts /before/ all prereleases, e.g.
  @1.0.dev1 < 1.0a1@), __1__ for an actual prerelease (with @stage@ a\/b\/rc and
  its number), and __2__ for a final or post release (sorts after prereleases).
\* @p440Post@ is @(0,0)@ when absent, so a final sorts below any post-release.
\* @p440Dev@ is @(0,n)@ when present and @(1,0)@ when absent, so a dev release
  sorts below its non-dev sibling.
-}
data Pep440Key = Pep440Key
    { Pep440Key -> Integer
p440Epoch :: Integer
    , Pep440Key -> [Integer]
p440Release :: [Integer]
    , Pep440Key -> (Int, Int, Integer)
p440Pre :: (Int, Int, Integer)
    , Pep440Key -> (Int, Integer)
p440Post :: (Int, Integer)
    , Pep440Key -> (Int, Integer)
p440Dev :: (Int, Integer)
    , Pep440Key -> [VToken]
p440Local :: [VToken]
    }
    deriving stock (Pep440Key -> Pep440Key -> Bool
(Pep440Key -> Pep440Key -> Bool)
-> (Pep440Key -> Pep440Key -> Bool) -> Eq Pep440Key
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Pep440Key -> Pep440Key -> Bool
== :: Pep440Key -> Pep440Key -> Bool
$c/= :: Pep440Key -> Pep440Key -> Bool
/= :: Pep440Key -> Pep440Key -> Bool
Eq, Eq Pep440Key
Eq Pep440Key =>
(Pep440Key -> Pep440Key -> Ordering)
-> (Pep440Key -> Pep440Key -> Bool)
-> (Pep440Key -> Pep440Key -> Bool)
-> (Pep440Key -> Pep440Key -> Bool)
-> (Pep440Key -> Pep440Key -> Bool)
-> (Pep440Key -> Pep440Key -> Pep440Key)
-> (Pep440Key -> Pep440Key -> Pep440Key)
-> Ord Pep440Key
Pep440Key -> Pep440Key -> Bool
Pep440Key -> Pep440Key -> Ordering
Pep440Key -> Pep440Key -> Pep440Key
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Pep440Key -> Pep440Key -> Ordering
compare :: Pep440Key -> Pep440Key -> Ordering
$c< :: Pep440Key -> Pep440Key -> Bool
< :: Pep440Key -> Pep440Key -> Bool
$c<= :: Pep440Key -> Pep440Key -> Bool
<= :: Pep440Key -> Pep440Key -> Bool
$c> :: Pep440Key -> Pep440Key -> Bool
> :: Pep440Key -> Pep440Key -> Bool
$c>= :: Pep440Key -> Pep440Key -> Bool
>= :: Pep440Key -> Pep440Key -> Bool
$cmax :: Pep440Key -> Pep440Key -> Pep440Key
max :: Pep440Key -> Pep440Key -> Pep440Key
$cmin :: Pep440Key -> Pep440Key -> Pep440Key
min :: Pep440Key -> Pep440Key -> Pep440Key
Ord, Int -> Pep440Key -> ShowS
[Pep440Key] -> ShowS
Pep440Key -> String
(Int -> Pep440Key -> ShowS)
-> (Pep440Key -> String)
-> ([Pep440Key] -> ShowS)
-> Show Pep440Key
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Pep440Key -> ShowS
showsPrec :: Int -> Pep440Key -> ShowS
$cshow :: Pep440Key -> String
show :: Pep440Key -> String
$cshowList :: [Pep440Key] -> ShowS
showList :: [Pep440Key] -> ShowS
Show)

{- | Parse a PEP 440 version, canonicalising non-normalised spellings
(@1.0ALPHA1@, @1.0-1@, trailing zeros, …). Fails if the string is not a valid
PEP 440 version (e.g. no release, or unrecognised trailing text).
-}
parsePep440 :: Text -> Maybe Pep440Key
parsePep440 :: Text -> Maybe Pep440Key
parsePep440 Text
raw = do
    -- Bound the input length before any numeric parsing: a segment is read into an
    -- 'Integer' with 'readMaybe', which is quadratic in the digit count, so an
    -- unbounded run in hostile metadata would be an algorithmic-complexity DoS.
    Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Text -> Int -> Ordering
T.compareLength Text
raw Int
maxVersionLength Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
/= Ordering
GT)
    let lowered :: Text
lowered = Text -> Text
T.toLower (Text -> Text
T.strip Text
raw)
        noV :: Text
noV = Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
lowered (Text -> Text -> Maybe Text
T.stripPrefix Text
"v" Text
lowered)
        (Text
mainPart, Text
localRaw) = HasCallStack => Text -> Text -> (Text, Text)
Text -> Text -> (Text, Text)
T.breakOn Text
"+" Text
noV
    Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard ((Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isMainChar Text
mainPart)
    (epoch, afterEpoch) <- Text -> Maybe (Integer, Text)
parseEpoch Text
mainPart
    (release, suffix) <- parseRelease afterEpoch
    suffixParts <- parsePep440Suffix suffix
    localToks <- parseLocal localRaw
    pure (assembleKey epoch release suffixParts localToks)
  where
    isMainChar :: Char -> Bool
isMainChar Char
c = Char -> Bool
isAsciiAlphaNum Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'!' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_'

-- Split an optional @epoch!@ prefix off the main part: the epoch (0 when
-- absent) and the remainder.
parseEpoch :: Text -> Maybe (Integer, Text)
parseEpoch :: Text -> Maybe (Integer, Text)
parseEpoch Text
mainPart = do
    let (Text
epochText, Text
afterEpoch) = case HasCallStack => Text -> Text -> (Text, Text)
Text -> Text -> (Text, Text)
T.breakOn Text
"!" Text
mainPart of
            (Text
e, Text
rest)
                | Text -> Bool
T.null Text
rest -> (Text
"", Text
mainPart)
                | Bool
otherwise -> (Text
e, Int -> Text -> Text
T.drop Int
1 Text
rest)
    epoch <- if Text -> Bool
T.null Text
epochText then Integer -> Maybe Integer
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Integer
0 else Text -> Maybe Integer
parseNumSeg Text
epochText
    pure (epoch, afterEpoch)

-- Consume the leading dotted-numeric release: its segments (at least one) and
-- the unconsumed suffix.
parseRelease :: Text -> Maybe ([Integer], Text)
parseRelease :: Text -> Maybe ([Integer], Text)
parseRelease Text
afterEpoch = do
    let (Text
releaseText, Text
suffix) = (Char -> Bool) -> Text -> (Text, Text)
T.span (\Char
c -> Char -> Bool
isDigit Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.') Text
afterEpoch
        -- 'releaseText' greedily grabs the dot that separates the release from a
        -- suffix ("1.0.dev1" → "1.0." → ["1","0",""]), so drop *one* trailing
        -- empty segment -- but only one, and reject any remaining empty segment so
        -- interior/leading blanks ("1..0", ".1.0", "1.0..dev1") are not accepted.
        relSegs :: [Text]
relSegs = [Text] -> [Text]
dropTrailingEmpty (HasCallStack => Text -> Text -> [Text]
Text -> Text -> [Text]
T.splitOn Text
"." Text
releaseText)
    Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Bool
not ((Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any Text -> Bool
T.null [Text]
relSegs))
    release <- (Text -> Maybe Integer) -> [Text] -> Maybe [Integer]
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> [a] -> f [b]
traverse Text -> Maybe Integer
parseNumSeg [Text]
relSegs
    guard (not (null release))
    pure (release, suffix)
  where
    -- Drop at most one trailing empty segment (the release/suffix separator dot).
    -- Only the final segment is dropped, so a doubled trailing blank ("1.0..dev1")
    -- leaves an empty segment behind for the 'any T.null' guard above to reject.
    dropTrailingEmpty :: [Text] -> [Text]
dropTrailingEmpty [Text]
segs = case [Text] -> Maybe ([Text], Text)
forall a. [a] -> Maybe ([a], a)
unsnoc [Text]
segs of
        Just ([Text]
initSegs, Text
lastSeg) | Text -> Bool
T.null Text
lastSeg -> [Text]
initSegs
        Maybe ([Text], Text)
_ -> [Text]
segs

-- Parse the local segment (still carrying its leading @+@) into ordering
-- tokens; empty input means no local segment.
parseLocal :: Text -> Maybe [VToken]
parseLocal :: Text -> Maybe [VToken]
parseLocal Text
lr
    | Text -> Bool
T.null Text
lr = [VToken] -> Maybe [VToken]
forall a. a -> Maybe a
Just []
    | Bool
otherwise =
        let segs :: [Text]
segs = (Char -> Bool) -> Text -> [Text]
T.split (Char -> String -> Bool
forall (f :: * -> *) a.
(Foldable f, DisallowElem f, Eq a) =>
a -> f a -> Bool
`elem` [Char
'.', Char
'-', Char
'_']) (Int -> Text -> Text
T.drop Int
1 Text
lr)
         in if (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (\Text
s -> Bool -> Bool
not (Text -> Bool
T.null Text
s) Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isAsciiAlphaNum Text
s) [Text]
segs
                then [VToken] -> Maybe [VToken]
forall a. a -> Maybe a
Just ((Text -> VToken) -> [Text] -> [VToken]
forall a b. (a -> b) -> [a] -> [b]
map Text -> VToken
localTok [Text]
segs)
                else Maybe [VToken]
forall a. Maybe a
Nothing
  where
    localTok :: Text -> VToken
localTok Text
s = if (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isDigit Text
s then Integer -> VToken
VNum (Text -> Integer
numOr0 Text
s) else Text -> VToken
VStr Text
s

-- Assemble the canonical key: strip the release's trailing zeros and band the
-- suffix parts into the rank tuples documented on 'Pep440Key'.
assembleKey ::
    Integer -> [Integer] -> (Maybe (Int, Integer), Maybe Integer, Maybe Integer) -> [VToken] -> Pep440Key
assembleKey :: Integer
-> [Integer]
-> (Maybe (Int, Integer), Maybe Integer, Maybe Integer)
-> [VToken]
-> Pep440Key
assembleKey Integer
epoch [Integer]
release (Maybe (Int, Integer)
mPre, Maybe Integer
mPost, Maybe Integer
mDev) [VToken]
localToks =
    Pep440Key
        { p440Epoch :: Integer
p440Epoch = Integer
epoch
        , p440Release :: [Integer]
p440Release = [Integer] -> [Integer]
stripTrailingZeros [Integer]
release
        , p440Pre :: (Int, Int, Integer)
p440Pre = (Int, Int, Integer)
pre
        , p440Post :: (Int, Integer)
p440Post = (Int, Integer)
post
        , p440Dev :: (Int, Integer)
p440Dev = (Int, Integer)
dev
        , p440Local :: [VToken]
p440Local = [VToken]
localToks
        }
  where
    pre :: (Int, Int, Integer)
pre = case Maybe (Int, Integer)
mPre of
        Just (Int
stage, Integer
n) -> (Int
1, Int
stage, Integer
n)
        Maybe (Int, Integer)
Nothing
            | Maybe Integer -> Bool
forall a. Maybe a -> Bool
isJust Maybe Integer
mDev Bool -> Bool -> Bool
&& Maybe Integer -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Integer
mPost -> (Int
0, Int
0, Integer
0)
            | Bool
otherwise -> (Int
2, Int
0, Integer
0)
    post :: (Int, Integer)
post = case Maybe Integer
mPost of
        Maybe Integer
Nothing -> (Int
0, Integer
0)
        Just Integer
n -> (Int
1, Integer
n)
    dev :: (Int, Integer)
dev = case Maybe Integer
mDev of
        Maybe Integer
Nothing -> (Int
1, Integer
0)
        Just Integer
n -> (Int
0, Integer
n)
    stripTrailingZeros :: [Integer] -> [Integer]
stripTrailingZeros = (Integer -> Bool) -> [Integer] -> [Integer]
forall a. (a -> Bool) -> [a] -> [a]
dropWhileEnd (Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0)

{- | Consume a PEP 440 suffix into its prerelease\/post\/dev parts (each absent
or present), failing if any text is left unconsumed (so trailing garbage is
rejected). The banding into a sort key happens in 'parsePep440'.
-}
parsePep440Suffix ::
    Text -> Maybe (Maybe (Int, Integer), Maybe Integer, Maybe Integer)
parsePep440Suffix :: Text -> Maybe (Maybe (Int, Integer), Maybe Integer, Maybe Integer)
parsePep440Suffix Text
s0 =
    let (Maybe (Int, Integer)
pre, Text
s1) = Text -> (Maybe (Int, Integer), Text)
consumePre Text
s0
        (Maybe Integer
post, Text
s2) = Text -> (Maybe Integer, Text)
consumePost Text
s1
        (Maybe Integer
dev, Text
s3) = Text -> (Maybe Integer, Text)
consumeDev Text
s2
     in if Text -> Bool
T.null Text
s3 then (Maybe (Int, Integer), Maybe Integer, Maybe Integer)
-> Maybe (Maybe (Int, Integer), Maybe Integer, Maybe Integer)
forall a. a -> Maybe a
Just (Maybe (Int, Integer)
pre, Maybe Integer
post, Maybe Integer
dev) else Maybe (Maybe (Int, Integer), Maybe Integer, Maybe Integer)
forall a. Maybe a
Nothing

-- | Drop one optional separator (@.@\/@-@\/@_@) from the front.
dropSep :: Text -> Text
dropSep :: Text -> Text
dropSep Text
s = case Text -> Maybe (Char, Text)
T.uncons Text
s of
    Just (Char
c, Text
rest) | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_' -> Text
rest
    Maybe (Char, Text)
_ -> Text
s

{- | Consume an optional prerelease label into @Just (stage, n)@ (stage 0\/1\/2
for a\/b\/rc); 'Nothing' if absent.
-}
consumePre :: Text -> (Maybe (Int, Integer), Text)
consumePre :: Text -> (Maybe (Int, Integer), Text)
consumePre Text
s =
    case [Maybe (Int, Text)] -> Maybe (Int, Text)
forall (t :: * -> *) (f :: * -> *) a.
(Foldable t, Alternative f) =>
t (f a) -> f a
asum (((Text, Int) -> Maybe (Int, Text))
-> [(Text, Int)] -> [Maybe (Int, Text)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
lbl, Int
rk) -> (,) Int
rk (Text -> (Int, Text)) -> Maybe Text -> Maybe (Int, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Text -> Maybe Text
T.stripPrefix Text
lbl (Text -> Text
dropSep Text
s)) [(Text, Int)]
preLabels) of
        Maybe (Int, Text)
Nothing -> (Maybe (Int, Integer)
forall a. Maybe a
Nothing, Text
s)
        Just (Int
rk, Text
afterLabel) ->
            let (Text
digits, Text
rest) = (Char -> Bool) -> Text -> (Text, Text)
T.span Char -> Bool
isDigit (Text -> Text
dropSep Text
afterLabel)
             in ((Int, Integer) -> Maybe (Int, Integer)
forall a. a -> Maybe a
Just (Int
rk, Text -> Integer
numOr0 Text
digits), Text
rest)
  where
    preLabels :: [(Text, Int)]
preLabels =
        [ (Text
"alpha", Int
0)
        , (Text
"beta", Int
1)
        , (Text
"preview", Int
2)
        , (Text
"pre", Int
2)
        , (Text
"rc", Int
2)
        , (Text
"a", Int
0)
        , (Text
"b", Int
1)
        , (Text
"c", Int
2)
        ]

{- | Consume an optional post-release (@.postN@, @.revN@, or @-N@) into @Just n@;
'Nothing' if absent.
-}
consumePost :: Text -> (Maybe Integer, Text)
consumePost :: Text -> (Maybe Integer, Text)
consumePost Text
s =
    case [Maybe Text] -> Maybe Text
forall (t :: * -> *) (f :: * -> *) a.
(Foldable t, Alternative f) =>
t (f a) -> f a
asum ((Text -> Maybe Text) -> [Text] -> [Maybe Text]
forall a b. (a -> b) -> [a] -> [b]
map (\Text
lbl -> Text -> Text -> Maybe Text
T.stripPrefix Text
lbl (Text -> Text
dropSep Text
s)) [Text
"post", Text
"rev"]) of
        Just Text
afterLabel ->
            let (Text
digits, Text
rest) = (Char -> Bool) -> Text -> (Text, Text)
T.span Char -> Bool
isDigit (Text -> Text
dropSep Text
afterLabel)
             in (Integer -> Maybe Integer
forall a. a -> Maybe a
Just (Text -> Integer
numOr0 Text
digits), Text
rest)
        Maybe Text
Nothing -> case Text -> Text -> Maybe Text
T.stripPrefix Text
"-" Text
s of
            Just Text
afterDash ->
                let (Text
digits, Text
rest) = (Char -> Bool) -> Text -> (Text, Text)
T.span Char -> Bool
isDigit Text
afterDash
                 in if Text -> Bool
T.null Text
digits then (Maybe Integer
forall a. Maybe a
Nothing, Text
s) else (Integer -> Maybe Integer
forall a. a -> Maybe a
Just (Text -> Integer
numOr0 Text
digits), Text
rest)
            Maybe Text
Nothing -> (Maybe Integer
forall a. Maybe a
Nothing, Text
s)

-- | Consume an optional dev-release (@.devN@) into @Just n@; 'Nothing' if absent.
consumeDev :: Text -> (Maybe Integer, Text)
consumeDev :: Text -> (Maybe Integer, Text)
consumeDev Text
s =
    case Text -> Text -> Maybe Text
T.stripPrefix Text
"dev" (Text -> Text
dropSep Text
s) of
        Just Text
afterLabel ->
            let (Text
digits, Text
rest) = (Char -> Bool) -> Text -> (Text, Text)
T.span Char -> Bool
isDigit (Text -> Text
dropSep Text
afterLabel)
             in (Integer -> Maybe Integer
forall a. a -> Maybe a
Just (Text -> Integer
numOr0 Text
digits), Text
rest)
        Maybe Text
Nothing -> (Maybe Integer
forall a. Maybe a
Nothing, Text
s)

{- | Whether a PEP 440 version is stable: neither a pre-release (@a@\/@b@\/@rc@)
nor a dev release. Post-releases /are/ stable. So @1.0@ and @1.0.post1@ are
stable; @1.0a1@, @1.0rc1@, @1.0.dev1@ and @1.0a1.dev2@ are not.
-}
isPep440Stable :: Pep440Key -> Bool
isPep440Stable :: Pep440Key -> Bool
isPep440Stable Pep440Key
k = Pep440Key -> Bool
noPre Pep440Key
k Bool -> Bool -> Bool
&& Pep440Key -> Bool
noDev Pep440Key
k
  where
    -- Final/post: no prerelease band (1) and no dev band (0). The field
    -- semantics are documented on 'Pep440Key'; post-releases stay stable.
    noPre :: Pep440Key -> Bool
noPre Pep440Key
key = case Pep440Key -> (Int, Int, Integer)
p440Pre Pep440Key
key of (Int
band, Int
_, Integer
_) -> Int
band Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
1
    noDev :: Pep440Key -> Bool
noDev Pep440Key
key = case Pep440Key -> (Int, Integer)
p440Dev Pep440Key
key of (Int
band, Integer
_) -> Int
band Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0