-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathStatistics.hs
executable file
·261 lines (230 loc) · 10 KB
/
Statistics.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#!/usr/bin/env cabal
{- cabal:
build-depends:
base, binary, bytestring, data-default < 0.8, github,
optparse-applicative, text, time, vector
default-language: GHC2021
ghc-options: -Wall -Wno-type-defaults
-}
{-# LANGUAGE ApplicativeDo #-}
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
import Control.Exception
import Control.Monad (when)
import Data.Binary
import Data.ByteString qualified as B
import Data.ByteString.Lazy qualified as BL
import Data.Foldable (forM_)
import Data.List (sort)
import Data.Maybe (isNothing, mapMaybe)
import Data.Text (Text)
import Data.Text qualified as T
import Data.Time.Calendar (Day, diffDays)
import Data.Time.Clock (UTCTime (..), diffUTCTime, getCurrentTime)
import Data.Tuple
import Data.Vector qualified as V
import GitHub qualified as GH
import Options.Applicative
import System.IO
import Prelude hiding (until)
data Config = Config
{ startTime :: Day
, endTime :: Day
, workMode :: WorkMode
}
data WorkMode
= Offline FilePath
| Online (Maybe B.ByteString) (Maybe FilePath)
configParser :: UTCTime -> Parser Config
configParser currTime = do
startTime <-
option auto $
long "since"
<> metavar "DATE"
<> help "The start date of the analysis"
<> showDefault
<> value (read "2021-10-23")
endTime <-
option auto $
long "until"
<> metavar "DATE"
<> help "The end date of the analysis"
<> showDefault
<> value (utctDay currTime)
let
parseUsername =
optional $
strOption $
long "user"
<> metavar "USERNAME"
<> help "GitHub username to bypass anonymous API rate limit"
parseCacheFile =
optional $
strOption $
long "cache"
<> metavar "FILE"
<> help "File to save cache to"
parseOffline =
strOption $
long "offline"
<> metavar "FILE"
<> help "Work offline using previously saved cache"
workMode <- Offline <$> parseOffline <|> Online <$> parseUsername <*> parseCacheFile
pure Config {..}
getPassword :: IO B.ByteString
getPassword = do
hFlush stdout
pass <-
bracket_
(hSetEcho stdin False)
(hSetEcho stdin True)
B.getLine
putChar '\n'
pure pass
getBasicAuth :: B.ByteString -> IO GH.Auth
getBasicAuth username = do
putStr "Launching missiles...\nEnter password to abort: "
password <- getPassword
pure $ GH.BasicAuth username password
getGithubIssues :: WorkMode -> IO (V.Vector GH.Issue)
getGithubIssues (Offline cacheFile) =
decode <$> BL.readFile cacheFile
getGithubIssues (Online mUsername mCacheFile) = do
let githubOrg = "haskell"
githubRepo = "core-libraries-committee"
am <- traverse getBasicAuth mUsername
response <- case am of
Nothing -> GH.github () GH.issuesForRepoR githubOrg githubRepo GH.stateAll GH.FetchAll
Just ba -> GH.github ba GH.issuesForRepoR githubOrg githubRepo GH.stateAll GH.FetchAll
issues <- case response of
Left err -> error $ show err
Right is -> pure is
case mCacheFile of
Nothing -> pure ()
Just cacheFile -> BL.writeFile cacheFile (encode issues)
pure issues
data Issue = Issue
{ issNumber :: !Int
, issTitle :: !Text
, issCreatedAt :: !UTCTime
, issClosedAt :: !(Maybe UTCTime)
, issLabels :: ![Text]
, issComments :: !Int
}
deriving (Eq, Ord, Show)
githubIssueToIssue :: GH.Issue -> Issue
githubIssueToIssue GH.Issue {..} = Issue {..}
where
issNumber = GH.unIssueNumber issueNumber
issTitle = issueTitle
issCreatedAt = issueCreatedAt
issClosedAt = issueClosedAt
issLabels = map (GH.untagName . GH.labelName) $ V.toList issueLabels
issComments = issueComments
computeLifeTimeInDays :: Issue -> Maybe Double
computeLifeTimeInDays Issue {..} = case issClosedAt of
Nothing -> Nothing
Just t -> Just $ realToFrac (diffUTCTime t issCreatedAt) / 86400
computeDaysSinceCreation :: Day -> Issue -> Double
computeDaysSinceCreation currTime Issue {..} =
realToFrac (diffUTCTime (UTCTime currTime 0) issCreatedAt) / 86400
isApproved :: Issue -> Bool
isApproved Issue {..} = "approved" `elem` issLabels
isDeclined :: Issue -> Bool
isDeclined Issue {..} = "declined" `elem` issLabels
isProposal :: Issue -> Bool
isProposal Issue {..} = not ("meta" `elem` issLabels || "core-libraries" `elem` issLabels)
isWithinTimeFrame :: Day -> Day -> Issue -> Bool
isWithinTimeFrame since to Issue {..} =
issCreatedAt >= UTCTime since 0
&& issCreatedAt <= UTCTime to 0
isBase :: Int -> Issue -> Bool
isBase n Issue {..} = ("base-4." <> T.pack (show n)) `elem` issLabels
data Stat = Stat
{ statMinIssue :: Issue
, statMinMetric :: Double
, statMinIssue2 :: Issue
, statMinMetric2 :: Double
, statMed :: Int
, statAvg :: Int
, statMaxIssue2 :: Issue
, statMaxMetric2 :: Double
, statMaxIssue :: Issue
, statMaxMetric :: Double
}
deriving (Show)
collectStat :: (Issue -> Maybe Double) -> [Issue] -> Stat
collectStat _ [] = error "collectStat: no issues found!"
collectStat f is = Stat {..}
where
(statMinIssue, statMinMetric) = getMin is
(statMinIssue2, statMinMetric2) = getMin $ filter (/= statMinIssue) is
statMed = round $ median $ mapMaybe f is
statAvg = round $ average $ mapMaybe f is
(statMaxIssue, statMaxMetric) = getMax is
(statMaxIssue2, statMaxMetric2) = getMax $ filter (/= statMaxIssue) is
getExtremum g = swap . g . mapMaybe (\x -> (,x) <$> f x)
getMin = getExtremum minimum
getMax = getExtremum maximum
median xs = sort xs !! (length xs `quot` 2)
average xs = sum xs / fromIntegral (length xs)
main :: IO ()
main = do
currTime <- getCurrentTime
Config {..} <-
execParser $
info
(configParser currTime <**> helper)
(fullDesc <> header "Collect statistics for CLC proposals")
issues <- getGithubIssues workMode
let proposals =
filter (isWithinTimeFrame startTime endTime) $
filter isProposal . map githubIssueToIssue $
filter (isNothing . GH.issuePullRequest) $
V.toList issues
approvedProposals = filter isApproved proposals
declinedProposals = filter isDeclined proposals
putStrLn $ "Timeframe: since " ++ show startTime ++ " until " ++ show endTime
putStrLn ""
putStrLn $ "Total number of CLC proposals: " ++ show (length proposals)
putStrLn $ "Rate of proposals: " ++ show (round (fromIntegral (length proposals) * 365.25 / 12 / realToFrac (diffDays endTime startTime))) ++ " per month"
putStrLn $ "Approved proposals: " ++ show (length approvedProposals)
putStrLn $ "Declined proposals: " ++ show (length declinedProposals)
putStrLn ""
let allLifeTime = collectStat computeLifeTimeInDays proposals
approvedLifeTime = collectStat computeLifeTimeInDays approvedProposals
putStrLn $ "Median time from creation to decision: " ++ show (statMed allLifeTime) ++ " days"
putStrLn $ "Average time from creation to decision: " ++ show (statAvg allLifeTime) ++ " days"
putStrLn $ "Median time from creation to approval: " ++ show (statMed approvedLifeTime) ++ " days"
putStrLn $ "Average time from creation to approval: " ++ show (statAvg approvedLifeTime) ++ " days"
putStrLn $ "Fastest approval:\n\t" ++ show (round (statMinMetric approvedLifeTime * 24)) ++ " hours for " ++ show (issTitle (statMinIssue approvedLifeTime))
putStrLn $ "2nd fastest approval:\n\t" ++ show (round (statMinMetric2 approvedLifeTime * 24)) ++ " hours for " ++ show (issTitle (statMinIssue2 approvedLifeTime))
putStrLn $ "2nd slowest approval:\n\t" ++ show (round (statMaxMetric2 approvedLifeTime)) ++ " days for " ++ show (issTitle (statMaxIssue2 approvedLifeTime))
putStrLn $ "Slowest approval:\n\t" ++ show (round (statMaxMetric approvedLifeTime)) ++ " days for " ++ show (issTitle (statMaxIssue approvedLifeTime))
putStrLn ""
let allComments = collectStat (Just . fromIntegral . issComments) proposals
approvedComments = collectStat (Just . fromIntegral . issComments) approvedProposals
putStrLn $ "Total activity: " ++ show (sum (map issComments proposals)) ++ " comments"
putStrLn $ "Median activity per proposal: " ++ show (statMed allComments) ++ " comments"
putStrLn $ "Average activity per proposal: " ++ show (statAvg allComments) ++ " comments"
putStrLn $ "Median activity per approved proposal: " ++ show (statMed approvedComments) ++ " comments"
putStrLn $ "Average activity per approved proposal: " ++ show (statAvg approvedComments) ++ " comments"
putStrLn $ "Least active approved proposal:\n\t" ++ show (round (statMinMetric approvedComments)) ++ " comment for " ++ show (issTitle (statMinIssue approvedComments))
putStrLn $ "2nd least active approved proposal:\n\t" ++ show (round (statMinMetric2 approvedComments)) ++ " comments for " ++ show (issTitle (statMinIssue2 approvedComments))
putStrLn $ "2nd most active:\n\t" ++ show (round (statMaxMetric2 allComments)) ++ " comments for " ++ show (issTitle (statMaxIssue2 allComments))
putStrLn $ "Most active:\n\t" ++ show (round (statMaxMetric allComments)) ++ " comments for " ++ show (issTitle (statMaxIssue allComments))
putStrLn ""
forM_ [0 .. 99] $ \n -> do
let releasedInVersion = length (filter (isBase n) approvedProposals)
when (releasedInVersion > 0) $
putStrLn $ "Released in base-4." ++ show n ++ ": " ++ show releasedInVersion
putStrLn ""
let openProposals = filter (isNothing . issClosedAt) proposals
when (not $ null openProposals) $ do
putStrLn $ "Open proposals: " ++ show (length openProposals)
let openLifeTime = collectStat (Just . computeDaysSinceCreation endTime) openProposals
putStrLn $ "Median age for open proposals: " ++ show (statMed openLifeTime) ++ " days"
putStrLn $ "Average age for open proposals: " ++ show (statAvg openLifeTime) ++ " days"
putStrLn $ "Newest open proposal:\n\t" ++ show (round (statMinMetric openLifeTime)) ++ " days for " ++ show (issTitle (statMinIssue openLifeTime))
putStrLn $ "Oldest open proposal:\n\t" ++ show (round (statMaxMetric openLifeTime)) ++ " days for " ++ show (issTitle (statMaxIssue openLifeTime))