forked from broadinstitute/gatk
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_gatk_stats_app_script.gs
381 lines (299 loc) · 13 KB
/
get_gatk_stats_app_script.gs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
// Update GATK User Stats Spreadsheet
// Author: Jonn Smith
// Date: 2022 Jan 04
//
// Record the number of pulls for our GATK docker container(s).
// Designed to be run periodically.
//
// Inspired by: https://faun.pub/track-how-many-times-docker-image-was-pulled-55cafe67ac97
//
// Script URL: https://script.google.com/home/projects/1UQnN7Y7Ci8bP2QSczLdNerLUKh6vAmh1gCc40xEqiIGiPngfCLYeImbq/edit
const OVERALL_STATS_SHEET_NAME = "Overall Stats";
const DOCKER_STATS_SHEET_NAME = "Docker Repo Pulls";
const GITHUB_STATS_SHEET_NAME = "Github Stats";
const REQUIRED_SHEETS = [OVERALL_STATS_SHEET_NAME, DOCKER_STATS_SHEET_NAME, GITHUB_STATS_SHEET_NAME];
function updateSpredsheet() {
// Set up our variables for this instance of this script:
// NOTE: THIS ORDER MATTERS! APPEND! DO NOT INSERT!
const dockerHubImages = ['broadinstitute/gatk', 'broadinstitute/gatk3'];
const gitHubUrl = "https://api.github.com/repos/broadinstitute/gatk/releases";
// Auth token to pull down the info on the releases so we don't get rate-limited:
// This is OPTIONAL, but HIGHLY RECOMMENDED!
const GITHUB_AUTH_TOKEN = "";
// ============================================
// Make sure the spreadsheet is ready:
setupSpreadsheetIfNecessary(dockerHubImages);
// ============================================
// Update the docker image downloads:
var dockerImageDownloadCountList = recordDockerImagePullCount(dockerHubImages);
// Update our github release downloads:
var githubDownloads = recordGitHubReleaseDownloadCount(gitHubUrl, GITHUB_AUTH_TOKEN);
// ============================================
// Update our summary sheet:
var spreadsheet = SpreadsheetApp.getActive().getSheetByName(OVERALL_STATS_SHEET_NAME);
var downloadCounts = [];
downloadCounts = downloadCounts.concat(dockerImageDownloadCountList);
downloadCounts.push(githubDownloads);
var totalDownloads = 0;
downloadCounts.forEach(function (count) {
totalDownloads += count;
});
// This is the last row with data in it:
var row = [new Date()];
row = row.concat(downloadCounts);
row.push(totalDownloads);
spreadsheet.appendRow(row);
// Now make all the columns auto-fit so we can view them correctly:
REQUIRED_SHEETS.forEach(function (sheetName) {
const sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(sheetName);
sheet.autoResizeColumns(1, sheet.getLastColumn());
})
// Finally set the active sheet to the overall stats page:
SpreadsheetApp.getActiveSpreadsheet().setActiveSheet(
SpreadsheetApp.getActiveSpreadsheet().getSheetByName(OVERALL_STATS_SHEET_NAME)
);
}
function setupSpreadsheetIfNecessary(dockerHubImages) {
// Set up the sheets / tabs, headers, etc. required by this script.
const spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
// Check to see if we have to set up our sheets:
var mustInitializeSheets = false;
for (let i = 0; i < REQUIRED_SHEETS.length; i++) {
// Create the sheet itself:
if (spreadsheet.getSheetByName(REQUIRED_SHEETS[i]) == null) {
mustInitializeSheets = true;
break;
}
}
if (!mustInitializeSheets) {
console.log("Sheet already set up for data.");
return;
}
else {
console.log("Must initialize sheets!");
}
// Make sure all our required sheets exist:
REQUIRED_SHEETS.forEach( function (sheetName) {
console.log("Adding sheet: " + sheetName);
// Create the sheet itself:
if (spreadsheet.getSheetByName(sheetName) == null) {
const newSheet = spreadsheet.insertSheet(sheetName);
// Set up the top left cell as our date:
const dateRange = newSheet.getRange("A1");
dateRange.setValue("Date / Time");
dateRange.setFontWeight('bold');
}
})
// Set up the headers in the overall stats sheet next:
const overallStatsSheet = spreadsheet.getSheetByName(OVERALL_STATS_SHEET_NAME);
// Start with docker image headers:
var headerNum = 0;
dockerHubImages.forEach(function (dockerImage) {
console.log("Setting header for docker image: " + dockerImage);
var headerCell = overallStatsSheet.getRange(1, 2 + headerNum, 1, 1);
headerCell.setValue(dockerImage + " Docker Pulls");
headerCell.setFontWeight('bold');
headerNum += 1;
})
// Now the github header:
console.log("Setting header for GitHub releases");
var headerCell = overallStatsSheet.getRange(1, 2 + headerNum, 1, 1);
headerCell.setValue("Github Repo Pulls");
headerCell.setFontWeight('bold');
headerNum += 1;
// Now our total downloads:
console.log("Setting header for Total Downloads");
headerCell = overallStatsSheet.getRange(1, 2 + headerNum, 1, 1);
headerCell.setValue("Total Overall Downloads");
headerCell.setFontWeight('bold');
headerNum += 1;
// Clean up the default sheet if it's still there:
const defaultSheet = spreadsheet.getSheetByName("Sheet1");
if (defaultSheet != null) {
console.log("Removing default sheet.");
spreadsheet.deleteSheet(defaultSheet);
}
}
function cleanGithubUrl(url) {
// Convert a regular github url to an API url so that
// we can receive JSON results.
const urlRegex = /http.*?github.com\/(.*)/;
// Make sure the interpreter knows URL should be a string:
url = String(url);
// Set up a default return value:
var newUrl = url;
if (url.startsWith("https://github.com") || url.startsWith("http://github.com") || url.startsWith("https://www.github.com") || url.startsWith("http://www.github.com")) {
const match = url.match(urlRegex);
newUrl = "https://api.github.com/repos/" + match[1]
}
if (!newUrl.endsWith("releases") && !newUrl.endsWith("releases/")) {
if (newUrl.endsWith("/")) {
newUrl = newUrl + "releases"
}
else{
newUrl = newUrl + "/releases"
}
}
return newUrl;
}
function recordGitHubReleaseDownloadCount(githubUrl, authToken) {
// Header for authorization to pull down the info on the releases so we don't get rate-limited:
// This is OPTIONAL, but HIGHLY RECOMMENDED!
var REQUEST_HEADERS = {
"headers" : {
"Authorization" : "token " + authToken
}
};
var NUM_FIELDS_PER_RELEASE = 2;
var COUNT_START_COLUMN = 2;
var numResultsPerPage = 100;
var githubUrl = cleanGithubUrl(githubUrl);
var row = [new Date()];
// Get the spreadsheet with our docker info in it:
var spreadsheet = SpreadsheetApp.getActive().getSheetByName(GITHUB_STATS_SHEET_NAME);
// This is the last row with data in it:
var lastRow = spreadsheet.getLastRow();
// Get info on all our releases.
// We'll need to paginate here, so let's do something very simple
// when we have to get "more" rows:
var releaseInfo = [];
var pageNum = 1;
while (true) {
var response = "";
if (authToken.length > 0) {
response = UrlFetchApp.fetch(githubUrl + "?per_page=" + numResultsPerPage + "&page=" + pageNum, REQUEST_HEADERS);
}
else {
response = UrlFetchApp.fetch(githubUrl + "?per_page=" + numResultsPerPage + "&page=" + pageNum);
}
var githubJsonResponse = JSON.parse(response.getContentText());
console.log("Releases found on page %d: %d", pageNum, githubJsonResponse.length);
// If we've reached the last page, we won't have anything here:
if (githubJsonResponse.length == 0) {
break;
}
releaseInfo = releaseInfo.concat(githubJsonResponse);
pageNum++;
}
console.log("Total number of releases found: %d", releaseInfo.length);
// Sort our releases so we can put them into the spreadsheet
// and we don't have to worry about new releases later:
releaseInfo.sort((a,b) => {
if (a['published_at'] < b['published_at']) {
return -1;
}
else if (a['published_at'] > b['published_at']) {
return 1;
}
else {
return 0;
}
});
// Get the headers of our spreadsheet so we can check to see if our releases are all in there.
// Otherwise we may have to add in a new header for a new release.
var lastCol = spreadsheet.getLastColumn();
var releaseVersions = [];
if ((lastCol != 0) && (lastCol != 1)) {
releaseVersions = spreadsheet.getRange(1,2,1, lastCol-1).getValues()[0];
}
// Populate our new row of data:
var releaseIndex = 0;
var totalDownloads = 0;
releaseInfo.forEach( function (release) {
var tagName = release['tag_name'];
var downloadCount = 0;
if (release['assets'].length != 0) {
var downloadCount = release['assets'][0]['download_count'];
}
// Add our new download count to our row:
row.push(downloadCount);
// Now check if we have this entry already in our headers.
// We do this with the assumption that releases are never
// removed from the repo, so we can use simple counts to track
// when new releases are added:
var oldDownloadCount = 0;
// Need to divide the release versions by 2
// because they comprise all the headers (including the new downloads)
if ((releaseIndex + 1) > (releaseVersions.length/2)) {
// New release!
console.log("New release detected: %s", tagName)
// We need to add this new release to our headers!
console.log("Setting header cell: %d, %d) to track %s total downloads", 1, COUNT_START_COLUMN + (releaseIndex * NUM_FIELDS_PER_RELEASE), tagName)
var newReleaseDownloadCountHeaderCell = spreadsheet.getRange(1, COUNT_START_COLUMN + (releaseIndex * NUM_FIELDS_PER_RELEASE), 1, 1);
newReleaseDownloadCountHeaderCell.setValue(tagName);
newReleaseDownloadCountHeaderCell.setFontWeight('bold');
console.log("Setting header cell: %d, %d) to track %s new downloads", 1, COUNT_START_COLUMN + (releaseIndex * NUM_FIELDS_PER_RELEASE) + 1, tagName)
var newReleaseDownloadDiffCountHeaderCell = spreadsheet.getRange(1, COUNT_START_COLUMN + (releaseIndex * NUM_FIELDS_PER_RELEASE) + 1, 1, 1);
newReleaseDownloadDiffCountHeaderCell.setValue(tagName + " New Downloads");
newReleaseDownloadDiffCountHeaderCell.setFontWeight('bold');
// no old data so use today's downloads
row.push(downloadCount);
}
else {
// We've seen this release before. Let's get some diff numbers:
oldDownloadCount = spreadsheet.getRange(lastRow, COUNT_START_COLUMN + (releaseIndex * NUM_FIELDS_PER_RELEASE)).getValue();
row.push(downloadCount - oldDownloadCount);
}
console.log("Release: %s: %d (delta=%d)",tagName, downloadCount, downloadCount - oldDownloadCount);
releaseIndex++;
totalDownloads += downloadCount;
});
// Update the row for our docker image counts:
spreadsheet.appendRow(row);
return totalDownloads;
}
// ========================================================================
function recordDockerImagePullCount(images) {
var NUM_FIELDS_PER_IMAGE = 2;
var COUNT_START_COLUMN = 2;
var row = [new Date()];
// Get the spreadsheet with our docker info in it:
var spreadsheet = SpreadsheetApp.getActive().getSheetByName(DOCKER_STATS_SHEET_NAME);
// This is the last row with data in it:
var lastRow = spreadsheet.getLastRow();
// Get the headers of our docker spreadsheet so we can update as necessary:
var lastCol = spreadsheet.getLastColumn();
var imageVersions = [];
if (lastCol != 1) {
imageVersions = spreadsheet.getRange(1,2,1, lastCol-1).getValues()[0];
}
var totalPullCountList = [];
var imageNum = 0;
images.forEach(function (image) {
// Get the new count:
var pull_count = get_image_pull_count(image);
row.push(pull_count);
var new_pulls = 0;
// Check if we need to add a new header:
if ((imageNum + 1) > (imageVersions.length/2)) {
// New Docker Image to Track!
console.log("New docker image detected: %s", image)
// We need to add this new release to our headers!
console.log("Setting header cell: %d, %d) to track %s total pulls", 1, COUNT_START_COLUMN + (imageNum * NUM_FIELDS_PER_IMAGE), image)
var newReleaseDownloadCountHeaderCell = spreadsheet.getRange(1, COUNT_START_COLUMN + (imageNum * NUM_FIELDS_PER_IMAGE), 1, 1);
newReleaseDownloadCountHeaderCell.setValue("Pulls (" + image + ")");
newReleaseDownloadCountHeaderCell.setFontWeight('bold');
console.log("Setting header cell: %d, %d) to track %s new pulls", 1, COUNT_START_COLUMN + (imageNum * NUM_FIELDS_PER_IMAGE) + 1, image)
var newReleaseDownloadDiffCountHeaderCell = spreadsheet.getRange(1, COUNT_START_COLUMN + (imageNum * NUM_FIELDS_PER_IMAGE) + 1, 1, 1);
newReleaseDownloadDiffCountHeaderCell.setValue("New Pulls (" + image + ")");
newReleaseDownloadDiffCountHeaderCell.setFontWeight('bold');
}
else {
// Get the old count as well:
var oldPullCount = spreadsheet.getRange(lastRow, COUNT_START_COLUMN + (imageNum * NUM_FIELDS_PER_IMAGE)).getValue();
new_pulls = pull_count - oldPullCount;
}
// Add the count diff to our row:
row.push(new_pulls);
imageNum++;
totalPullCountList.push(pull_count);
});
// Update the row for our docker image counts:
spreadsheet.appendRow(row);
return totalPullCountList;
}
function get_image_pull_count(image) {
var response = UrlFetchApp.fetch("https://hub.docker.com/v2/repositories/" + image);
var imageStats = JSON.parse(response.getContentText());
return imageStats['pull_count'];
}