Skip to content

Commit

Permalink
Fix the reported modification time of reports.
Browse files Browse the repository at this point in the history
The `/report/list` endpoint uses the `git_ls` function to enumerate
reports, and supposedly includes the modification time is the response.

Unfortunately the modification time returned by `git_ls` is pretty
useless, and actually refers to the modification times in the local
worktree, which has nothing to do with commit times.

The proper way to do this is to identify the last commit to have touched
the file, and then get the time of that commit. Unfortunately gert does
not have an easy way to list commits on just one file (and in fact
neither does libgit2), so we fallback to using the `git rev-list`
command.

Additionally I have re-written the code that was used to compare reports
against the default branch to use Git plumbing commands instead of the
high-level porcelain ones.
  • Loading branch information
plietar committed Jan 10, 2025
1 parent f07ad38 commit 068cbe1
Show file tree
Hide file tree
Showing 10 changed files with 208 additions and 69 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"),
Description: Small HTTP server for running orderly reports.
License: MIT + file LICENSE
Encoding: UTF-8
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE, roclets = c("rd", "namespace", "porcelain::porcelain_roclet"))
URL: https://github.com/mrc-ide/orderly.runner
BugReports: https://github.com/mrc-ide/orderly.runner/issues
Expand Down
24 changes: 6 additions & 18 deletions R/api.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,25 +51,13 @@ root <- function() {
##' query ref :: string
##' state root :: root
report_list <- function(root, ref) {
contents <- gert::git_ls(root, ref = ref)
re <- "^src/([^/]+)/(\\1|orderly)\\.R$"
nms <- sub(re, "\\1",
grep(re, contents$path, value = TRUE, perl = TRUE),
perl = TRUE
)
last_changed <- function(nm) {
max(contents$modified[startsWith(contents$path, sprintf("src/%s", nm))])
}
updatedTime <- vnapply(nms, last_changed, USE.NAMES = FALSE)
modified_sources <- git_get_modified(ref, relative_dir = "src/", repo = root)
modified_reports <- unique(first_dirname(modified_sources))
hasModifications <- vlapply(nms, function(report_name) {
report_name %in% modified_reports
}, USE.NAMES = FALSE)
base <- git_get_default_branch(root)
reports <- get_reports(root = root, ref = ref, base = base)

data.frame(
name = nms,
updatedTime = updatedTime,
hasModifications = hasModifications
name = reports$name,
updatedTime = as.numeric(reports$updated_at),
hasModifications = reports$has_changes
)
}

Expand Down
46 changes: 27 additions & 19 deletions R/git.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
git_run <- function(args, repo = NULL, check = FALSE) {
git_run <- function(args, repo = NULL) {
git <- sys_which("git")
if (!is.null(repo)) {
args <- c("-C", repo, args)
}
res <- system3(git, args)
if (check && !res$success) {
if (!res$success) {
stop(sprintf("Error code %d running command:\n%s",
res$code, paste0(" > ", res$output, collapse = "\n")))
}
Expand All @@ -21,21 +21,29 @@ git_get_default_branch <- function(repo = NULL) {
}


git_get_modified <- function(ref, base = NULL,
relative_dir = NULL, repo = NULL) {
if (is.null(base)) {
base <- git_get_default_branch(repo)
}
if (is.null(relative_dir)) {
relative <- ""
additional_args <- ""
} else {
relative <- sprintf("--relative=%s", relative_dir)
additional_args <- sprintf("-- %s", relative_dir)
}
git_run(
c("diff", "--name-only", relative,
sprintf("%s...%s", base, gert::git_commit_id(ref, repo = repo)),
additional_args),
repo = repo, check = TRUE)$output
#' Get the last commit to have modified the given path.
#'
#' If the path is a directory, any modification to files contained within
#' it are considered.
git_get_latest_commit <- function(repo, ref, path) {
# libgit2 (and thus gert) doesn't really have an interface for this.
# See https://github.com/libgit2/libgit2/issues/495
git_run(c("rev-list", "--max-count=1", ref, "--", path),
repo = repo)$output
}


#' Get the difference between two tree-ish
#'
#' @return a dataframe for each differing entry in the trees, with columns
#' `mode1`, `mode2`, `hash1`, `hash2`, `status`, `score`, `src` and `dst`.
git_diff_tree <- function(left, right, repo = NULL) {
output <- git_run(c("diff-tree", left, right), repo = repo)$output

# See https://git-scm.com/docs/git-diff-tree#_raw_output_format for a
# description of the format.
re <- paste0(
"^:(?<mode1>\\d+) (?<mode2>\\d+) (?<hash1>[0-9a-f]+) (?<hash2>[0-9a-f]+)",
" (?<status>[A-Z])(?<score>\\d+)?\\t(?<src>[^\\t]*)(?:\\t(?<dst>[^\\t]*))?$")
as.data.frame(stringr::str_match(output, re)[, -1, drop = FALSE])
}
34 changes: 33 additions & 1 deletion R/reports.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,40 @@
extract_report_names <- function(paths) {
re <- "^src/([^/]+)/(\\1|orderly)\\.R$"
sub(re, "\\1", grep(re, paths, value = TRUE, perl = TRUE))
}

get_reports <- function(ref, root, base = NULL) {
contents <- gert::git_ls(repo = root, ref = ref)
names <- extract_report_names(contents$path)

paths <- paste0("src/", names)
times <- as.POSIXct(vnapply(paths, function(p) {
commit <- git_get_latest_commit(root, ref, p)
gert::git_commit_info(commit, repo = root)$time
}))

result <- data.frame(
name = names,
updated_at = times,
row.names = NULL
)

if (!is.null(base)) {
diff <- git_diff_tree(paste0(base, ":src"),
paste0(ref, ":src"),
repo = root)
result$has_changes <- names %in% diff$src
}

result
}


get_report_parameters <- function(name, ref, root) {
path <- get_orderly_script_path(name, ref, root)
sha <- gert::git_commit_id(ref, repo = root)
contents <- git_run(
c("show", sprintf("%s:%s", sha, path)), repo = root, check = TRUE
c("show", sprintf("%s:%s", sha, path)), repo = root
)$output
exprs <- parse(text = contents)
orderly2::orderly_parse_expr(exprs, filename = basename(path))$parameters
Expand Down
15 changes: 15 additions & 0 deletions man/git_diff_tree.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions man/git_get_latest_commit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/helper-orderly-runner.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ git_add_and_commit <- function(path, add = ".") {


create_new_commit <- function(path, new_file = "new", add = ".") {
writeLines("new file", file.path(path, new_file))
writeLines(ids::random_id(), file.path(path, new_file))
git_add_and_commit(path, add)
}

Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-api.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ test_that("can get parameters for a report", {
test_that("can run orderly reports", {
skip_if_no_redis()

queue_id <- "orderly.runner:cute-animal"
queue_id <- orderly_queue_id()
repo <- test_prepare_orderly_example(c("data", "parameters"))
gert::git_init(repo)
orderly2::orderly_gitignore_update("(root)", root = repo)
Expand Down Expand Up @@ -138,7 +138,7 @@ test_that("can run orderly reports", {
test_that("can get statuses of jobs", {
# run 2 jobs first and wait for finish
skip_if_no_redis()
queue_id <- "orderly.runner:bad-animal"
queue_id <- orderly_queue_id()
repo <- test_prepare_orderly_example(c("data", "parameters"))
gert::git_init(repo)
orderly2::orderly_gitignore_update("(root)", root = repo)
Expand Down
83 changes: 56 additions & 27 deletions tests/testthat/test-git.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
test_that("handle failure", {
testthat::skip_on_cran()
repo <- initialise_git_repo()
r <- git_run("unknown-command", repo = repo$path)
expect_false(r$success)
expect_error(
git_run("unknown-command", repo = repo$path, check = TRUE),
r$output, fixed = TRUE)
git_run("unknown-command", repo = repo$path),
"'unknown-command' is not a git command", fixed = TRUE)
})


Expand All @@ -21,27 +19,58 @@ test_that("can get default branch when remote origin is set", {
})


test_that("can get files which have been modified", {
testthat::skip_on_cran()
repo <- test_prepare_orderly_remote_example("data")
copy_examples("parameters", repo$local)
git_add_and_commit(repo$local)

log <- gert::git_log(repo = repo$local)
expect_equal(git_get_modified(log$commit[[2]], repo = repo$local),
character(0))
expect_equal(git_get_modified(log$commit[[1]], repo = repo$local),
"src/parameters/parameters.R")
expect_equal(git_get_modified(log$commit[[1]], relative = "src/",
repo = repo$local),
"parameters/parameters.R")
expect_equal(git_get_modified(log$commit[[1]], base = log$commit[[2]],
repo = repo$local),
"src/parameters/parameters.R")
expect_equal(git_get_modified(log$commit[[2]], base = log$commit[[1]],
repo = repo$local),
character(0))
expect_equal(git_get_modified(log$commit[[2]], base = log$commit[[2]],
repo = repo$local),
character(0))
test_that("can get last commit for a path", {
repo <- initialise_git_repo()
c1 <- create_new_commit(repo$path, "hello.txt")
c2 <- create_new_commit(repo$path, "world.txt")
c3 <- create_new_commit(repo$path, "hello.txt")
c4 <- create_new_commit(repo$path, "world.txt")

expect_equal(git_get_latest_commit(repo$path, "HEAD", "hello.txt"), c3)
expect_equal(git_get_latest_commit(repo$path, "HEAD", "world.txt"), c4)

expect_equal(git_get_latest_commit(repo$path, c2, "hello.txt"), c1)
expect_equal(git_get_latest_commit(repo$path, c2, "world.txt"), c2)
})


test_that("can diff trees", {
repo <- test_prepare_orderly_remote_example("data")
copy_examples("parameters", repo$local)
git_add_and_commit(repo$local)

result <- git_diff_tree("HEAD^:src", "HEAD:src", repo = repo$local)
expect_equal(nrow(result), 1)

expect_equal(result$mode1, "000000")
expect_equal(result$mode2, "040000")
expect_match(result$hash1, "[0-9a-f]{20}")
expect_match(result$hash2, "[0-9a-f]{20}")
expect_equal(result$status, "A")
expect_equal(result$src, "parameters")

create_new_commit(repo$local, "src/parameters/hello.txt")

result <- git_diff_tree("HEAD^:src", "HEAD:src", repo = repo$local)
expect_equal(nrow(result), 1)

expect_equal(result$mode1, "040000")
expect_equal(result$mode2, "040000")
expect_match(result$hash1, "[0-9a-f]{20}")
expect_match(result$hash2, "[0-9a-f]{20}")
expect_equal(result$status, "M")
expect_equal(result$src, "parameters")

fs::file_move(file.path(repo$local, "src", "parameters"),
file.path(repo$local, "src", "zparameters"))
git_add_and_commit(repo$local)

# diff-tree never detects renames or copies. They are instead represented as
# a add and delete.
result <- git_diff_tree("HEAD^:src", "HEAD:src", repo = repo$local)
expect_equal(nrow(result), 2)
expect_equal(result$status[[1]], "D")
expect_equal(result$src[[1]], "parameters")
expect_equal(result$status[[2]], "A")
expect_equal(result$src[[2]], "zparameters")
})
55 changes: 55 additions & 0 deletions tests/testthat/test-reports.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,58 @@
test_that("can get list of reports", {
root <- test_prepare_orderly_example(c("data", "parameters"))
helper_add_git(root)

reports <- get_reports(root = root, ref = "HEAD")
expect_setequal(reports$name, c("data", "parameters"))
})


test_that("report list includes last modification time", {
root <- test_prepare_orderly_example(c("data", "parameters"))
helper_add_git(root)

writeLines("Hello", file.path(root, "src/data/hello.txt"))
writeLines("World", file.path(root, "src/parameters/world.txt"))

t1 <- as.POSIXct("2000-01-01T00:00:00")
t2 <- as.POSIXct("2010-01-01T00:00:00")

gert::git_add("src/data/hello.txt", repo = root)
gert::git_commit(
"hello",
author = gert::git_signature("author", "email", t1),
repo = root
)

gert::git_add("src/parameters/world.txt", repo = root)
gert::git_commit(
"world",
author = gert::git_signature("author", "email", t2),
repo = root
)

reports <- get_reports(root = root, ref = "HEAD")
expect_setequal(reports$name, c("data", "parameters"))
expect_equal(reports[reports$name == "data",]$updated_at, t1)
expect_equal(reports[reports$name == "parameters",]$updated_at, t2)
})


test_that("report list includes modification status", {
root <- test_prepare_orderly_example(c("data", "parameters"))
helper_add_git(root)

writeLines("Hello", file.path(root, "src/data/hello.txt"))
git_add_and_commit(root)

reports <- get_reports(root = root, ref = "HEAD", base = "HEAD^")
expect_setequal(reports$name, c("data", "parameters"))
expect_true(reports[reports$name == "data",]$has_changes)
expect_true(!reports[reports$name == "parameters",]$has_changes)

})


test_that("can get orderly script name", {
root <- test_prepare_orderly_example(c("data", "parameters"))
git_info <- helper_add_git(root)
Expand Down

0 comments on commit 068cbe1

Please sign in to comment.