From 285d89b19078895e71b06e2987c2e373ecbcaeb8 Mon Sep 17 00:00:00 2001
From: Carlos Segarra <carlos@carlossegarra.com>
Date: Thu, 25 Nov 2021 10:50:57 +0000
Subject: [PATCH 01/48] adding sanitiser flags and workflow file

---
 .github/workflows/sanitisers.yml | 194 +++++++++++++++++++++++++++++++
 CMakeLists.txt                   |  13 +++
 tasks/dev.py                     |  19 ++-
 3 files changed, 225 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/sanitisers.yml

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
new file mode 100644
index 000000000..9b040c16c
--- /dev/null
+++ b/.github/workflows/sanitisers.yml
@@ -0,0 +1,194 @@
+name: Nightly Checks
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+    types: [opened, synchronize, reopened, ready_for_review]
+
+jobs:
+  address-sanitiser:
+    if: github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+    env:
+      HOST_TYPE: ci
+      REDIS_QUEUE_HOST: redis
+      REDIS_STATE_HOST: redis
+    container:
+      image: faasm/faabric:0.2.2
+    defaults:
+      run:
+        working-directory: /code/faabric
+    services:
+      redis:
+        image: redis
+    steps:
+      # --- Code update ---
+      - name: "Fetch ref"
+        run: git fetch origin ${GITHUB_REF}:ci-branch
+      - name: "Check out branch"
+        run: git checkout --force ci-branch
+      # --- Set-up ---
+      - name: "Ping redis"
+        run: redis-cli -h redis ping
+      # --- Build with thread sanitising
+      - name: "Build tests with leak sanitising"
+        run: inv dev.sanitise Address
+      # --- Tests ---
+      - name: "Run tests with leak sanitising on"
+        run: ./bin/faabric_tests > /dev/null 2> ./addsan.out
+        working-directory: /build/faabric/static
+      - name: "DELETE ME"
+        run: cat ./addsan.out
+      # --- Compare with expected output
+      - name: "Compare with expected output"
+        run: diff ./addsan.out ./tests/sanitise/expected_addsan.out
+
+  thread-sanitiser:
+    if: github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+    env:
+      HOST_TYPE: ci
+      REDIS_QUEUE_HOST: redis
+      REDIS_STATE_HOST: redis
+    container:
+      image: faasm/faabric:0.2.2
+    defaults:
+      run:
+        working-directory: /code/faabric
+    services:
+      redis:
+        image: redis
+    steps:
+      # --- Code update ---
+      - name: "Fetch ref"
+        run: git fetch origin ${GITHUB_REF}:ci-branch
+      - name: "Check out branch"
+        run: git checkout --force ci-branch
+      # --- Set-up ---
+      - name: "Ping redis"
+        run: redis-cli -h redis ping
+      # --- Build with thread sanitising
+      - name: "Build tests with thread sanitising"
+        run: inv dev.sanitise Thread
+      # --- Tests ---
+      - name: "Run tests with thread sanitising on and print stderr"
+        run: ./bin/faabric_tests > /dev/null 2> ./threadsan.out
+        working-directory: /build/faabric/static
+      - name: "DELETE ME"
+        run: cat ./threadsan.out
+      # --- Compare with expected output
+      - name: "Compare with expected output"
+        run: diff ./threadsan.out ./tests/sanitise/expected_threadsan.out
+
+  undefined-sanitiser:
+    if: github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+    env:
+      HOST_TYPE: ci
+      REDIS_QUEUE_HOST: redis
+      REDIS_STATE_HOST: redis
+    container:
+      image: faasm/faabric:0.2.2
+    defaults:
+      run:
+        working-directory: /code/faabric
+    services:
+      redis:
+        image: redis
+    steps:
+      # --- Code update ---
+      - name: "Fetch ref"
+        run: git fetch origin ${GITHUB_REF}:ci-branch
+      - name: "Check out branch"
+        run: git checkout --force ci-branch
+      # --- Set-up ---
+      - name: "Ping redis"
+        run: redis-cli -h redis ping
+      # --- Build with thread sanitising
+      - name: "Build tests with undefined sanitising"
+        run: inv dev.sanitise Undefined
+      # --- Tests ---
+      - name: "Run tests with undefined sanitising on"
+        run: ./bin/faabric_tests > /dev/null 2> ./ubsan.out
+        working-directory: /build/faabric/static
+      - name: "DELETE ME"
+        run: cat ./ubsan.out
+      # --- Compare with expected output
+      - name: "Compare with expected output"
+        run: diff ./ubsan.out ./tests/sanitise/expected_ubsan.out
+
+  leak-sanitiser:
+    if: github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+    env:
+      HOST_TYPE: ci
+      REDIS_QUEUE_HOST: redis
+      REDIS_STATE_HOST: redis
+    container:
+      image: faasm/faabric:0.2.2
+    defaults:
+      run:
+        working-directory: /code/faabric
+    services:
+      redis:
+        image: redis
+    steps:
+      # --- Code update ---
+      - name: "Fetch ref"
+        run: git fetch origin ${GITHUB_REF}:ci-branch
+      - name: "Check out branch"
+        run: git checkout --force ci-branch
+      # --- Set-up ---
+      - name: "Ping redis"
+        run: redis-cli -h redis ping
+      # --- Build with thread sanitising
+      - name: "Build tests with leak sanitising"
+        run: inv dev.sanitise Leak
+      # --- Tests ---
+      - name: "Run tests with leak sanitising on"
+        run: ./bin/faabric_tests > /dev/null 2> ./leaksan.out
+        working-directory: /build/faabric/static
+      - name: "DELETE ME"
+        run: cat ./leaksan.out
+      # --- Compare with expected output
+      - name: "Compare with expected output"
+        run: diff ./leaksan.out ./tests/sanitise/expected_leaksan.out
+
+  memory-sanitiser:
+    if: github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+    env:
+      HOST_TYPE: ci
+      REDIS_QUEUE_HOST: redis
+      REDIS_STATE_HOST: redis
+    container:
+      image: faasm/faabric:0.2.2
+    defaults:
+      run:
+        working-directory: /code/faabric
+    services:
+      redis:
+        image: redis
+    steps:
+      # --- Code update ---
+      - name: "Fetch ref"
+        run: git fetch origin ${GITHUB_REF}:ci-branch
+      - name: "Check out branch"
+        run: git checkout --force ci-branch
+      # --- Set-up ---
+      - name: "Ping redis"
+        run: redis-cli -h redis ping
+      # --- Build with thread sanitising
+      - name: "Build tests with leak sanitising"
+        run: inv dev.sanitise Memory
+      # --- Tests ---
+      - name: "Run tests with leak sanitising on"
+        run: ./bin/faabric_tests > /dev/null 2> ./memsan.out
+        working-directory: /build/faabric/static
+      - name: "DELETE ME"
+        run: cat ./memsan.out
+      # --- Compare with expected output
+      - name: "Compare with expected output"
+        run: diff ./memsan.out ./tests/sanitise/expected_memsan.out
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f9ca50d2e..5710565e1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,6 +22,19 @@ set(CMAKE_EXE_LINKER_FLAGS "-fuse-ld=lld")
 # Compile comamnds for clang tools
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
+# Set-up use of sanitisers
+if (FAABRIC_USE_SANITISER STREQUAL "Address")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
+elseif (FAABRIC_USE_SANITISER STREQUAL "Thread")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
+elseif (FAABRIC_USE_SANITISER STREQUAL "Undefined")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
+elseif (FAABRIC_USE_SANITISER STREQUAL "Leak")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=leak")
+elseif (FAABRIC_USE_SANITISER STREQUAL "Memory")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=memory")
+endif()
+
 # Global include dir
 set(FAABRIC_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/include)
 
diff --git a/tasks/dev.py b/tasks/dev.py
index b202c0e6e..ed2099ba5 100644
--- a/tasks/dev.py
+++ b/tasks/dev.py
@@ -14,7 +14,7 @@
 
 
 @task
-def cmake(ctx, clean=False, shared=False, build="Debug"):
+def cmake(ctx, clean=False, shared=False, build="Debug", sanitise_mode="None"):
     """
     Configures the build
     """
@@ -40,6 +40,7 @@ def cmake(ctx, clean=False, shared=False, build="Debug"):
         "-DBUILD_SHARED_LIBS={}".format("ON" if shared else "OFF"),
         "-DCMAKE_CXX_COMPILER=/usr/bin/clang++-13",
         "-DCMAKE_C_COMPILER=/usr/bin/clang-13",
+        "-DFAABRIC_USE_SANITISER={}".format(sanitise_mode),
         PROJ_ROOT,
     ]
 
@@ -78,3 +79,19 @@ def install(ctx, target, shared=False):
         cwd=build_dir,
         shell=True,
     )
+
+
+@task
+def sanitise(ctx, mode, shared=False):
+    """
+    Build the tests with different sanitisers
+    """
+    mode_list = ["Address", "Thread", "Undefined", "Leak", "Memory"]
+    if mode not in mode_list:
+        print("Unrecognised sanitiser mode: {}".format(mode))
+        print("Sanitisier mode must be one in {}".format(mode_list))
+        return
+
+    cmake(ctx, clean=True, shared=shared, build="Debug", sanitise_mode=mode)
+
+    cc(ctx, "faabric_tests", shared=shared)

From 949cb34aa9e4d710a5c03bd83b5f1146da8a8350 Mon Sep 17 00:00:00 2001
From: Carlos Segarra <carlos@carlossegarra.com>
Date: Thu, 25 Nov 2021 10:53:54 +0000
Subject: [PATCH 02/48] adding files to diff against

---
 .github/workflows/sanitisers.yml      | 75 ++++++++++++++-------------
 tests/sanitise/expected_addsan.log    |  1 +
 tests/sanitise/expected_leaksan.log   |  1 +
 tests/sanitise/expected_memsan.log    |  1 +
 tests/sanitise/expected_threadsan.log |  1 +
 tests/sanitise/expected_ubsan.log     |  1 +
 6 files changed, 43 insertions(+), 37 deletions(-)
 create mode 100644 tests/sanitise/expected_addsan.log
 create mode 100644 tests/sanitise/expected_leaksan.log
 create mode 100644 tests/sanitise/expected_memsan.log
 create mode 100644 tests/sanitise/expected_threadsan.log
 create mode 100644 tests/sanitise/expected_ubsan.log

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index 9b040c16c..6c6844c1f 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -1,4 +1,4 @@
-name: Nightly Checks
+name: Sanitiser Checks
 
 on:
   push:
@@ -36,14 +36,14 @@ jobs:
       - name: "Build tests with leak sanitising"
         run: inv dev.sanitise Address
       # --- Tests ---
-      - name: "Run tests with leak sanitising on"
-        run: ./bin/faabric_tests > /dev/null 2> ./addsan.out
+      - name: "Run tests with address sanitising on and print to stderr to file"
+        run: ./bin/faabric_tests > /dev/null # 2> ./addsan.log
         working-directory: /build/faabric/static
-      - name: "DELETE ME"
-        run: cat ./addsan.out
-      # --- Compare with expected output
-      - name: "Compare with expected output"
-        run: diff ./addsan.out ./tests/sanitise/expected_addsan.out
+        #       - name: "DELETE ME"
+        #         run: cat ./addsan.log
+        #       # --- Compare with expected output
+        #       - name: "Compare with expected output"
+        #         run: diff ./addsan.log ./tests/sanitise/expected_addsan.log
 
   thread-sanitiser:
     if: github.event.pull_request.draft == false
@@ -73,14 +73,15 @@ jobs:
       - name: "Build tests with thread sanitising"
         run: inv dev.sanitise Thread
       # --- Tests ---
-      - name: "Run tests with thread sanitising on and print stderr"
-        run: ./bin/faabric_tests > /dev/null 2> ./threadsan.out
+      - name: "Run tests with thread sanitising on and print stderr to file"
+        run: ./bin/faabric_tests > /dev/null #2> ./threadsan.log
         working-directory: /build/faabric/static
-      - name: "DELETE ME"
-        run: cat ./threadsan.out
-      # --- Compare with expected output
-      - name: "Compare with expected output"
-        run: diff ./threadsan.out ./tests/sanitise/expected_threadsan.out
+        #       - name: "DELETE ME"
+        #         run: cat ./threadsan.log
+        #         working-directory: /build/faabric/static
+        #       # --- Compare with expected output
+        #       - name: "Compare with expected output"
+        #         run: diff ./threadsan.log ./tests/sanitise/expected_threadsan.log
 
   undefined-sanitiser:
     if: github.event.pull_request.draft == false
@@ -110,14 +111,14 @@ jobs:
       - name: "Build tests with undefined sanitising"
         run: inv dev.sanitise Undefined
       # --- Tests ---
-      - name: "Run tests with undefined sanitising on"
-        run: ./bin/faabric_tests > /dev/null 2> ./ubsan.out
+      - name: "Run tests with undefined sanitising on and print stderr to file"
+        run: ./bin/faabric_tests > /dev/null # 2> ./ubsan.log
         working-directory: /build/faabric/static
-      - name: "DELETE ME"
-        run: cat ./ubsan.out
-      # --- Compare with expected output
-      - name: "Compare with expected output"
-        run: diff ./ubsan.out ./tests/sanitise/expected_ubsan.out
+        #       - name: "DELETE ME"
+        #         run: cat ./ubsan.log
+        #       # --- Compare with expected output
+        #       - name: "Compare with expected output"
+        #         run: diff ./ubsan.log ./tests/sanitise/expected_ubsan.log
 
   leak-sanitiser:
     if: github.event.pull_request.draft == false
@@ -147,14 +148,14 @@ jobs:
       - name: "Build tests with leak sanitising"
         run: inv dev.sanitise Leak
       # --- Tests ---
-      - name: "Run tests with leak sanitising on"
-        run: ./bin/faabric_tests > /dev/null 2> ./leaksan.out
+      - name: "Run tests with leak sanitising on and print stderr to file"
+        run: ./bin/faabric_tests > /dev/null # 2> ./leaksan.log
         working-directory: /build/faabric/static
-      - name: "DELETE ME"
-        run: cat ./leaksan.out
-      # --- Compare with expected output
-      - name: "Compare with expected output"
-        run: diff ./leaksan.out ./tests/sanitise/expected_leaksan.out
+        #       - name: "DELETE ME"
+        #         run: cat ./leaksan.log
+        #       # --- Compare with expected output
+        #       - name: "Compare with expected output"
+        #         run: diff ./leaksan.log ./tests/sanitise/expected_leaksan.log
 
   memory-sanitiser:
     if: github.event.pull_request.draft == false
@@ -181,14 +182,14 @@ jobs:
       - name: "Ping redis"
         run: redis-cli -h redis ping
       # --- Build with thread sanitising
-      - name: "Build tests with leak sanitising"
+      - name: "Build tests with memory sanitising"
         run: inv dev.sanitise Memory
       # --- Tests ---
-      - name: "Run tests with leak sanitising on"
-        run: ./bin/faabric_tests > /dev/null 2> ./memsan.out
+      - name: "Run tests with memory sanitising on and print stderr to file"
+        run: ./bin/faabric_tests > /dev/null # 2> ./memsan.log
         working-directory: /build/faabric/static
-      - name: "DELETE ME"
-        run: cat ./memsan.out
-      # --- Compare with expected output
-      - name: "Compare with expected output"
-        run: diff ./memsan.out ./tests/sanitise/expected_memsan.out
+        #       - name: "DELETE ME"
+        #         run: cat ./memsan.log
+        #       # --- Compare with expected output
+        #       - name: "Compare with expected output"
+        #         run: diff ./memsan.log ./tests/sanitise/expected_memsan.log
diff --git a/tests/sanitise/expected_addsan.log b/tests/sanitise/expected_addsan.log
new file mode 100644
index 000000000..3b94f9157
--- /dev/null
+++ b/tests/sanitise/expected_addsan.log
@@ -0,0 +1 @@
+Placeholder
diff --git a/tests/sanitise/expected_leaksan.log b/tests/sanitise/expected_leaksan.log
new file mode 100644
index 000000000..3b94f9157
--- /dev/null
+++ b/tests/sanitise/expected_leaksan.log
@@ -0,0 +1 @@
+Placeholder
diff --git a/tests/sanitise/expected_memsan.log b/tests/sanitise/expected_memsan.log
new file mode 100644
index 000000000..3b94f9157
--- /dev/null
+++ b/tests/sanitise/expected_memsan.log
@@ -0,0 +1 @@
+Placeholder
diff --git a/tests/sanitise/expected_threadsan.log b/tests/sanitise/expected_threadsan.log
new file mode 100644
index 000000000..3b94f9157
--- /dev/null
+++ b/tests/sanitise/expected_threadsan.log
@@ -0,0 +1 @@
+Placeholder
diff --git a/tests/sanitise/expected_ubsan.log b/tests/sanitise/expected_ubsan.log
new file mode 100644
index 000000000..3b94f9157
--- /dev/null
+++ b/tests/sanitise/expected_ubsan.log
@@ -0,0 +1 @@
+Placeholder

From 1e9885206d6aa98518f5fad9b68efc5382755007 Mon Sep 17 00:00:00 2001
From: Carlos Segarra <carlos@carlossegarra.com>
Date: Thu, 25 Nov 2021 12:04:29 +0000
Subject: [PATCH 03/48] cleaning up workflow file

---
 .github/workflows/sanitisers.yml      | 89 ++++++++++-----------------
 tasks/dev.py                          |  4 +-
 tests/sanitise/expected_addsan.log    |  1 -
 tests/sanitise/expected_leaksan.log   |  1 -
 tests/sanitise/expected_memsan.log    |  1 -
 tests/sanitise/expected_threadsan.log |  1 -
 tests/sanitise/expected_ubsan.log     |  1 -
 7 files changed, 36 insertions(+), 62 deletions(-)
 delete mode 100644 tests/sanitise/expected_addsan.log
 delete mode 100644 tests/sanitise/expected_leaksan.log
 delete mode 100644 tests/sanitise/expected_memsan.log
 delete mode 100644 tests/sanitise/expected_threadsan.log
 delete mode 100644 tests/sanitise/expected_ubsan.log

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index 6c6844c1f..65b744403 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -8,7 +8,7 @@ on:
     types: [opened, synchronize, reopened, ready_for_review]
 
 jobs:
-  address-sanitiser:
+  conan-cache:
     if: github.event.pull_request.draft == false
     runs-on: ubuntu-latest
     env:
@@ -32,21 +32,13 @@ jobs:
       # --- Set-up ---
       - name: "Ping redis"
         run: redis-cli -h redis ping
-      # --- Build with thread sanitising
-      - name: "Build tests with leak sanitising"
-        run: inv dev.sanitise Address
-      # --- Tests ---
-      - name: "Run tests with address sanitising on and print to stderr to file"
-        run: ./bin/faabric_tests > /dev/null # 2> ./addsan.log
-        working-directory: /build/faabric/static
-        #       - name: "DELETE ME"
-        #         run: cat ./addsan.log
-        #       # --- Compare with expected output
-        #       - name: "Compare with expected output"
-        #         run: diff ./addsan.log ./tests/sanitise/expected_addsan.log
+      # --- Build dependencies
+      - name: "Build dependencies to be shared by all sanitiser runs"
+        run: inv dev.cmake -b Debug
 
-  thread-sanitiser:
+  address-sanitiser:
     if: github.event.pull_request.draft == false
+    needs: [conan-cache]
     runs-on: ubuntu-latest
     env:
       HOST_TYPE: ci
@@ -70,21 +62,17 @@ jobs:
       - name: "Ping redis"
         run: redis-cli -h redis ping
       # --- Build with thread sanitising
-      - name: "Build tests with thread sanitising"
-        run: inv dev.sanitise Thread
-      # --- Tests ---
-      - name: "Run tests with thread sanitising on and print stderr to file"
-        run: ./bin/faabric_tests > /dev/null #2> ./threadsan.log
+      - name: "Build tests with address sanitising"
+        run: inv dev.sanitise Address --clean=False
+      - name: "Run tests with address sanitising on and print to stderr to file"
+        run: ./bin/faabric_tests > /dev/null
         working-directory: /build/faabric/static
-        #       - name: "DELETE ME"
-        #         run: cat ./threadsan.log
-        #         working-directory: /build/faabric/static
-        #       # --- Compare with expected output
-        #       - name: "Compare with expected output"
-        #         run: diff ./threadsan.log ./tests/sanitise/expected_threadsan.log
+        env:
+          ADSAN_OPTIONS: "verbosity=1:halt_on_error=1"
 
-  undefined-sanitiser:
+  thread-sanitiser:
     if: github.event.pull_request.draft == false
+    needs: [conan-cache]
     runs-on: ubuntu-latest
     env:
       HOST_TYPE: ci
@@ -108,20 +96,18 @@ jobs:
       - name: "Ping redis"
         run: redis-cli -h redis ping
       # --- Build with thread sanitising
-      - name: "Build tests with undefined sanitising"
-        run: inv dev.sanitise Undefined
+      - name: "Build tests with thread sanitising"
+        run: inv dev.sanitise Thread --clean=False
       # --- Tests ---
-      - name: "Run tests with undefined sanitising on and print stderr to file"
-        run: ./bin/faabric_tests > /dev/null # 2> ./ubsan.log
+      - name: "Run tests with thread sanitising on and print stderr to file"
+        run: ./bin/faabric_tests > /dev/null
         working-directory: /build/faabric/static
-        #       - name: "DELETE ME"
-        #         run: cat ./ubsan.log
-        #       # --- Compare with expected output
-        #       - name: "Compare with expected output"
-        #         run: diff ./ubsan.log ./tests/sanitise/expected_ubsan.log
+        env:
+          TSAN_OPTIONS: "verbosity=1:halt_on_error=1"
 
-  leak-sanitiser:
+  undefined-sanitiser:
     if: github.event.pull_request.draft == false
+    needs: [conan-cache]
     runs-on: ubuntu-latest
     env:
       HOST_TYPE: ci
@@ -145,20 +131,18 @@ jobs:
       - name: "Ping redis"
         run: redis-cli -h redis ping
       # --- Build with thread sanitising
-      - name: "Build tests with leak sanitising"
-        run: inv dev.sanitise Leak
+      - name: "Build tests with undefined sanitising"
+        run: inv dev.sanitise Undefined --clean=False
       # --- Tests ---
-      - name: "Run tests with leak sanitising on and print stderr to file"
-        run: ./bin/faabric_tests > /dev/null # 2> ./leaksan.log
+      - name: "Run tests with undefined sanitising on and print stderr to file"
+        run: ./bin/faabric_tests > /dev/null
         working-directory: /build/faabric/static
-        #       - name: "DELETE ME"
-        #         run: cat ./leaksan.log
-        #       # --- Compare with expected output
-        #       - name: "Compare with expected output"
-        #         run: diff ./leaksan.log ./tests/sanitise/expected_leaksan.log
+        env:
+          UBSAN_OPTIONS: "print_stacktrace=1:halt_on_error=1"
 
-  memory-sanitiser:
+  leak-sanitiser:
     if: github.event.pull_request.draft == false
+    needs: [conan-cache]
     runs-on: ubuntu-latest
     env:
       HOST_TYPE: ci
@@ -182,14 +166,9 @@ jobs:
       - name: "Ping redis"
         run: redis-cli -h redis ping
       # --- Build with thread sanitising
-      - name: "Build tests with memory sanitising"
-        run: inv dev.sanitise Memory
+      - name: "Build tests with leak sanitising"
+        run: inv dev.sanitise Leak --clean=False
       # --- Tests ---
-      - name: "Run tests with memory sanitising on and print stderr to file"
-        run: ./bin/faabric_tests > /dev/null # 2> ./memsan.log
+      - name: "Run tests with leak sanitising on and print stderr to file"
+        run: ./bin/faabric_tests > /dev/null
         working-directory: /build/faabric/static
-        #       - name: "DELETE ME"
-        #         run: cat ./memsan.log
-        #       # --- Compare with expected output
-        #       - name: "Compare with expected output"
-        #         run: diff ./memsan.log ./tests/sanitise/expected_memsan.log
diff --git a/tasks/dev.py b/tasks/dev.py
index ed2099ba5..315b58314 100644
--- a/tasks/dev.py
+++ b/tasks/dev.py
@@ -82,7 +82,7 @@ def install(ctx, target, shared=False):
 
 
 @task
-def sanitise(ctx, mode, shared=False):
+def sanitise(ctx, mode, clean=True, shared=False):
     """
     Build the tests with different sanitisers
     """
@@ -92,6 +92,6 @@ def sanitise(ctx, mode, shared=False):
         print("Sanitisier mode must be one in {}".format(mode_list))
         return
 
-    cmake(ctx, clean=True, shared=shared, build="Debug", sanitise_mode=mode)
+    cmake(ctx, clean=clean, shared=shared, build="Debug", sanitise_mode=mode)
 
     cc(ctx, "faabric_tests", shared=shared)
diff --git a/tests/sanitise/expected_addsan.log b/tests/sanitise/expected_addsan.log
deleted file mode 100644
index 3b94f9157..000000000
--- a/tests/sanitise/expected_addsan.log
+++ /dev/null
@@ -1 +0,0 @@
-Placeholder
diff --git a/tests/sanitise/expected_leaksan.log b/tests/sanitise/expected_leaksan.log
deleted file mode 100644
index 3b94f9157..000000000
--- a/tests/sanitise/expected_leaksan.log
+++ /dev/null
@@ -1 +0,0 @@
-Placeholder
diff --git a/tests/sanitise/expected_memsan.log b/tests/sanitise/expected_memsan.log
deleted file mode 100644
index 3b94f9157..000000000
--- a/tests/sanitise/expected_memsan.log
+++ /dev/null
@@ -1 +0,0 @@
-Placeholder
diff --git a/tests/sanitise/expected_threadsan.log b/tests/sanitise/expected_threadsan.log
deleted file mode 100644
index 3b94f9157..000000000
--- a/tests/sanitise/expected_threadsan.log
+++ /dev/null
@@ -1 +0,0 @@
-Placeholder
diff --git a/tests/sanitise/expected_ubsan.log b/tests/sanitise/expected_ubsan.log
deleted file mode 100644
index 3b94f9157..000000000
--- a/tests/sanitise/expected_ubsan.log
+++ /dev/null
@@ -1 +0,0 @@
-Placeholder

From aa45f750911b3c1b23d894d06d266ff504065d23 Mon Sep 17 00:00:00 2001
From: Carlos Segarra <carlos@carlossegarra.com>
Date: Thu, 25 Nov 2021 16:35:31 +0000
Subject: [PATCH 04/48] run only on workflow dispatch

---
 .github/workflows/sanitisers.yml | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index 65b744403..6914e1c6a 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -1,11 +1,6 @@
 name: Sanitiser Checks
 
-on:
-  push:
-    branches: [master]
-  pull_request:
-    branches: [master]
-    types: [opened, synchronize, reopened, ready_for_review]
+on: workflow_dispatch
 
 jobs:
   conan-cache:

From 7e9a4f2038d0a60d49f35068e449ad424afe38d7 Mon Sep 17 00:00:00 2001
From: Carlos Segarra <carlos@carlossegarra.com>
Date: Thu, 25 Nov 2021 16:43:12 +0000
Subject: [PATCH 05/48] properly use caching

---
 .github/workflows/sanitisers.yml | 33 +++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index 6914e1c6a..2c7b8c4ba 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -1,6 +1,12 @@
 name: Sanitiser Checks
 
-on: workflow_dispatch
+# on: workflow_dispatch
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+    types: [opened, synchronize, reopened, ready_for_review]
 
 jobs:
   conan-cache:
@@ -30,6 +36,11 @@ jobs:
       # --- Build dependencies
       - name: "Build dependencies to be shared by all sanitiser runs"
         run: inv dev.cmake -b Debug
+      - name: "Upload built CMake dependencies"
+        uses: actions/upload-artifact@v2
+        with:
+          name: cmake-cache
+          path: /build/faabric
 
   address-sanitiser:
     if: github.event.pull_request.draft == false
@@ -57,6 +68,11 @@ jobs:
       - name: "Ping redis"
         run: redis-cli -h redis ping
       # --- Build with thread sanitising
+      - name: "Download CMake dependencies"
+        uses: actions/download-artifact@v2
+        with:
+          name: cmake-cache
+          path: /build/faabric
       - name: "Build tests with address sanitising"
         run: inv dev.sanitise Address --clean=False
       - name: "Run tests with address sanitising on and print to stderr to file"
@@ -91,6 +107,11 @@ jobs:
       - name: "Ping redis"
         run: redis-cli -h redis ping
       # --- Build with thread sanitising
+      - name: "Download CMake dependencies"
+        uses: actions/download-artifact@v2
+        with:
+          name: cmake-cache
+          path: /build/faabric
       - name: "Build tests with thread sanitising"
         run: inv dev.sanitise Thread --clean=False
       # --- Tests ---
@@ -126,6 +147,11 @@ jobs:
       - name: "Ping redis"
         run: redis-cli -h redis ping
       # --- Build with thread sanitising
+      - name: "Download CMake dependencies"
+        uses: actions/download-artifact@v2
+        with:
+          name: cmake-cache
+          path: /build/faabric
       - name: "Build tests with undefined sanitising"
         run: inv dev.sanitise Undefined --clean=False
       # --- Tests ---
@@ -161,6 +187,11 @@ jobs:
       - name: "Ping redis"
         run: redis-cli -h redis ping
       # --- Build with thread sanitising
+      - name: "Download CMake dependencies"
+        uses: actions/download-artifact@v2
+        with:
+          name: cmake-cache
+          path: /build/faabric
       - name: "Build tests with leak sanitising"
         run: inv dev.sanitise Leak --clean=False
       # --- Tests ---

From 1c8cc2280ac55fed733369c2106ef408bf6ea629 Mon Sep 17 00:00:00 2001
From: Carlos Segarra <carlos@carlossegarra.com>
Date: Thu, 25 Nov 2021 16:50:26 +0000
Subject: [PATCH 06/48] setting flag as default to false

---
 .github/workflows/sanitisers.yml |  8 ++++----
 tasks/dev.py                     | 10 ++++++++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index 2c7b8c4ba..beaefa87f 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -74,7 +74,7 @@ jobs:
           name: cmake-cache
           path: /build/faabric
       - name: "Build tests with address sanitising"
-        run: inv dev.sanitise Address --clean=False
+        run: inv dev.sanitise Address --noclean
       - name: "Run tests with address sanitising on and print to stderr to file"
         run: ./bin/faabric_tests > /dev/null
         working-directory: /build/faabric/static
@@ -113,7 +113,7 @@ jobs:
           name: cmake-cache
           path: /build/faabric
       - name: "Build tests with thread sanitising"
-        run: inv dev.sanitise Thread --clean=False
+        run: inv dev.sanitise Thread --noclean
       # --- Tests ---
       - name: "Run tests with thread sanitising on and print stderr to file"
         run: ./bin/faabric_tests > /dev/null
@@ -153,7 +153,7 @@ jobs:
           name: cmake-cache
           path: /build/faabric
       - name: "Build tests with undefined sanitising"
-        run: inv dev.sanitise Undefined --clean=False
+        run: inv dev.sanitise Undefined --noclean
       # --- Tests ---
       - name: "Run tests with undefined sanitising on and print stderr to file"
         run: ./bin/faabric_tests > /dev/null
@@ -193,7 +193,7 @@ jobs:
           name: cmake-cache
           path: /build/faabric
       - name: "Build tests with leak sanitising"
-        run: inv dev.sanitise Leak --clean=False
+        run: inv dev.sanitise Leak --noclean
       # --- Tests ---
       - name: "Run tests with leak sanitising on and print stderr to file"
         run: ./bin/faabric_tests > /dev/null
diff --git a/tasks/dev.py b/tasks/dev.py
index 315b58314..81c6827d6 100644
--- a/tasks/dev.py
+++ b/tasks/dev.py
@@ -82,7 +82,7 @@ def install(ctx, target, shared=False):
 
 
 @task
-def sanitise(ctx, mode, clean=True, shared=False):
+def sanitise(ctx, mode, noclean=False, shared=False):
     """
     Build the tests with different sanitisers
     """
@@ -92,6 +92,12 @@ def sanitise(ctx, mode, clean=True, shared=False):
         print("Sanitisier mode must be one in {}".format(mode_list))
         return
 
-    cmake(ctx, clean=clean, shared=shared, build="Debug", sanitise_mode=mode)
+    cmake(
+        ctx,
+        clean=(not noclean),
+        shared=shared,
+        build="Debug",
+        sanitise_mode=mode,
+    )
 
     cc(ctx, "faabric_tests", shared=shared)

From 51b7facee2757eb399566b8f0790db95da75d24f Mon Sep 17 00:00:00 2001
From: Carlos Segarra <carlos@carlossegarra.com>
Date: Fri, 26 Nov 2021 09:52:21 +0000
Subject: [PATCH 07/48] cache the right folder

---
 .github/workflows/sanitisers.yml | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index beaefa87f..265e00c7b 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -36,11 +36,11 @@ jobs:
       # --- Build dependencies
       - name: "Build dependencies to be shared by all sanitiser runs"
         run: inv dev.cmake -b Debug
-      - name: "Upload built CMake dependencies"
+      - name: "Upload built Conan dependencies"
         uses: actions/upload-artifact@v2
         with:
-          name: cmake-cache
-          path: /build/faabric
+          name: conan-cache
+          path: '~/.conan'
 
   address-sanitiser:
     if: github.event.pull_request.draft == false
@@ -68,11 +68,11 @@ jobs:
       - name: "Ping redis"
         run: redis-cli -h redis ping
       # --- Build with thread sanitising
-      - name: "Download CMake dependencies"
+      - name: "Download Conan dependencies"
         uses: actions/download-artifact@v2
         with:
-          name: cmake-cache
-          path: /build/faabric
+          name: conan-cache
+          path: /root/.conan
       - name: "Build tests with address sanitising"
         run: inv dev.sanitise Address --noclean
       - name: "Run tests with address sanitising on and print to stderr to file"
@@ -107,11 +107,11 @@ jobs:
       - name: "Ping redis"
         run: redis-cli -h redis ping
       # --- Build with thread sanitising
-      - name: "Download CMake dependencies"
+      - name: "Download Conan dependencies"
         uses: actions/download-artifact@v2
         with:
-          name: cmake-cache
-          path: /build/faabric
+          name: conan-cache
+          path: /root/.conan
       - name: "Build tests with thread sanitising"
         run: inv dev.sanitise Thread --noclean
       # --- Tests ---
@@ -147,11 +147,11 @@ jobs:
       - name: "Ping redis"
         run: redis-cli -h redis ping
       # --- Build with thread sanitising
-      - name: "Download CMake dependencies"
+      - name: "Download Conan dependencies"
         uses: actions/download-artifact@v2
         with:
-          name: cmake-cache
-          path: /build/faabric
+          name: conan-cache
+          path: /root/.conan
       - name: "Build tests with undefined sanitising"
         run: inv dev.sanitise Undefined --noclean
       # --- Tests ---
@@ -187,11 +187,11 @@ jobs:
       - name: "Ping redis"
         run: redis-cli -h redis ping
       # --- Build with thread sanitising
-      - name: "Download CMake dependencies"
+      - name: "Download Conan dependencies"
         uses: actions/download-artifact@v2
         with:
-          name: cmake-cache
-          path: /build/faabric
+          name: conan-cache
+          path: /root/.conan
       - name: "Build tests with leak sanitising"
         run: inv dev.sanitise Leak --noclean
       # --- Tests ---

From 43b71d4ba054730bd50649e408f14495c07b924f Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 11:17:57 +0000
Subject: [PATCH 08/48] Fix ASAN_OPTIONS typo

---
 .github/workflows/sanitisers.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index 265e00c7b..dd78711d8 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -79,7 +79,7 @@ jobs:
         run: ./bin/faabric_tests > /dev/null
         working-directory: /build/faabric/static
         env:
-          ADSAN_OPTIONS: "verbosity=1:halt_on_error=1"
+          ASAN_OPTIONS: "verbosity=1:halt_on_error=1"
 
   thread-sanitiser:
     if: github.event.pull_request.draft == false

From 64bcf5ef9698fc4b0928fecbe4a3eb1478f2928d Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 11:18:17 +0000
Subject: [PATCH 09/48] Try using cachev2 instead of artifact for conan
 dependencies

---
 .github/workflows/sanitisers.yml | 75 +++++++++++++++++++++-----------
 1 file changed, 50 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index dd78711d8..dffb556b5 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -33,14 +33,19 @@ jobs:
       # --- Set-up ---
       - name: "Ping redis"
         run: redis-cli -h redis ping
+      # --- Cache based on Conan version and ExternalProjects cmake source
+      - name: Get Conan version
+        id: get-conan-version
+        run: |
+          echo "::set-output name=conan-version::$(conan --version | tr -d '[:alpha:] ')"
+        shell: bash
+      - uses: actions/cache@v2
+        with:
+          path: '~/.conan'
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('cmake/ExternalProjects.cmake') }}
       # --- Build dependencies
       - name: "Build dependencies to be shared by all sanitiser runs"
         run: inv dev.cmake -b Debug
-      - name: "Upload built Conan dependencies"
-        uses: actions/upload-artifact@v2
-        with:
-          name: conan-cache
-          path: '~/.conan'
 
   address-sanitiser:
     if: github.event.pull_request.draft == false
@@ -67,12 +72,17 @@ jobs:
       # --- Set-up ---
       - name: "Ping redis"
         run: redis-cli -h redis ping
-      # --- Build with thread sanitising
-      - name: "Download Conan dependencies"
-        uses: actions/download-artifact@v2
+      # --- Cache based on Conan version and ExternalProjects cmake source
+      - name: Get Conan version
+        id: get-conan-version
+        run: |
+          echo "::set-output name=conan-version::$(conan --version | tr -d '[:alpha:] ')"
+        shell: bash
+      - uses: actions/cache@v2
         with:
-          name: conan-cache
-          path: /root/.conan
+          path: '~/.conan'
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('cmake/ExternalProjects.cmake') }}
+      # --- Build with thread sanitising
       - name: "Build tests with address sanitising"
         run: inv dev.sanitise Address --noclean
       - name: "Run tests with address sanitising on and print to stderr to file"
@@ -106,12 +116,17 @@ jobs:
       # --- Set-up ---
       - name: "Ping redis"
         run: redis-cli -h redis ping
-      # --- Build with thread sanitising
-      - name: "Download Conan dependencies"
-        uses: actions/download-artifact@v2
+      # --- Cache based on Conan version and ExternalProjects cmake source
+      - name: Get Conan version
+        id: get-conan-version
+        run: |
+          echo "::set-output name=conan-version::$(conan --version | tr -d '[:alpha:] ')"
+        shell: bash
+      - uses: actions/cache@v2
         with:
-          name: conan-cache
-          path: /root/.conan
+          path: '~/.conan'
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('cmake/ExternalProjects.cmake') }}
+      # --- Build with thread sanitising
       - name: "Build tests with thread sanitising"
         run: inv dev.sanitise Thread --noclean
       # --- Tests ---
@@ -146,12 +161,17 @@ jobs:
       # --- Set-up ---
       - name: "Ping redis"
         run: redis-cli -h redis ping
-      # --- Build with thread sanitising
-      - name: "Download Conan dependencies"
-        uses: actions/download-artifact@v2
+      # --- Cache based on Conan version and ExternalProjects cmake source
+      - name: Get Conan version
+        id: get-conan-version
+        run: |
+          echo "::set-output name=conan-version::$(conan --version | tr -d '[:alpha:] ')"
+        shell: bash
+      - uses: actions/cache@v2
         with:
-          name: conan-cache
-          path: /root/.conan
+          path: '~/.conan'
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('cmake/ExternalProjects.cmake') }}
+      # --- Build with thread sanitising
       - name: "Build tests with undefined sanitising"
         run: inv dev.sanitise Undefined --noclean
       # --- Tests ---
@@ -186,12 +206,17 @@ jobs:
       # --- Set-up ---
       - name: "Ping redis"
         run: redis-cli -h redis ping
-      # --- Build with thread sanitising
-      - name: "Download Conan dependencies"
-        uses: actions/download-artifact@v2
+      # --- Cache based on Conan version and ExternalProjects cmake source
+      - name: Get Conan version
+        id: get-conan-version
+        run: |
+          echo "::set-output name=conan-version::$(conan --version | tr -d '[:alpha:] ')"
+        shell: bash
+      - uses: actions/cache@v2
         with:
-          name: conan-cache
-          path: /root/.conan
+          path: '~/.conan'
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('cmake/ExternalProjects.cmake') }}
+      # --- Build with thread sanitising
       - name: "Build tests with leak sanitising"
         run: inv dev.sanitise Leak --noclean
       # --- Tests ---

From b95e2b67cd3a1ee8f6e6e2382baaade539fa87b8 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 11:49:28 +0000
Subject: [PATCH 10/48] Use a local conan cache folder when using the cli, just
 like in faasm

---
 .gitignore         | 2 +-
 docker-compose.yml | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 94280d5c0..da0bc95d5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
 build/
-
+conan-cache/
 # Clang
 .clangd
 compile_commands.json
diff --git a/docker-compose.yml b/docker-compose.yml
index 48c61fbb7..fdd6900c4 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -11,6 +11,7 @@ services:
       - /usr/bin/docker:/usr/bin/docker
       - ./:/code/faabric
       - ./build:/build/faabric
+      - ./conan-cache/:/root/.conan
     working_dir: /code/faabric
     stdin_open: true
     tty: true
@@ -27,6 +28,7 @@ services:
     volumes:
       - ./:/code/faabric
       - ./build:/build/faabric
+      - ./conan-cache/:/root/.conan
     working_dir: /build/faabric/static
     environment:
       - LOG_LEVEL=debug

From a5b8c51da6d44d085131d24351389d4c304902e6 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 11:59:10 +0000
Subject: [PATCH 11/48] UBSan fix: Make sure created int pointer are 4-aligned

---
 tests/test/snapshot/test_snapshot_client_server.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test/snapshot/test_snapshot_client_server.cpp b/tests/test/snapshot/test_snapshot_client_server.cpp
index bd0764f6e..cd52fd883 100644
--- a/tests/test/snapshot/test_snapshot_client_server.cpp
+++ b/tests/test/snapshot/test_snapshot_client_server.cpp
@@ -188,7 +188,7 @@ TEST_CASE_METHOD(SnapshotClientServerFixture,
     SnapshotData snap = takeSnapshot(snapKey, 5, false);
 
     // Set up a couple of ints in the snapshot
-    int offsetA1 = 5;
+    int offsetA1 = 8;
     int offsetA2 = 2 * HOST_PAGE_SIZE;
     int baseA1 = 25;
     int baseA2 = 60;
@@ -241,7 +241,7 @@ TEST_CASE_METHOD(SnapshotClientServerFixture,
     std::string snapKey = std::to_string(generateGid());
     SnapshotData snap = takeSnapshot(snapKey, 5, false);
 
-    int offset = 5;
+    int offset = 8;
     std::vector<uint8_t> originalData;
     std::vector<uint8_t> diffData;
     std::vector<uint8_t> expectedData;

From 815c4ec06fae244b58d510533701c1369a929782 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 13:10:49 +0000
Subject: [PATCH 12/48] ASan,TSan: Latch destruction race-condition fix

---
 include/faabric/util/latch.h | 3 ++-
 src/util/latch.cpp           | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/faabric/util/latch.h b/include/faabric/util/latch.h
index b87419fe4..9f91a8132 100644
--- a/include/faabric/util/latch.h
+++ b/include/faabric/util/latch.h
@@ -1,13 +1,14 @@
 #pragma once
 
 #include <condition_variable>
+#include <memory>
 #include <mutex>
 
 namespace faabric::util {
 
 #define DEFAULT_LATCH_TIMEOUT_MS 10000
 
-class Latch
+class Latch : public std::enable_shared_from_this<Latch>
 {
   public:
     // WARNING: this latch must be shared between threads using a shared
diff --git a/src/util/latch.cpp b/src/util/latch.cpp
index c44962beb..bce41dc4c 100644
--- a/src/util/latch.cpp
+++ b/src/util/latch.cpp
@@ -16,6 +16,8 @@ Latch::Latch(int countIn, int timeoutMsIn)
 
 void Latch::wait()
 {
+    // Keep the this shared_ptr alive to prevent heap-use-after-free
+    std::shared_ptr<Latch> _keepMeAlive = shared_from_this();
     UniqueLock lock(mx);
 
     waiters++;

From f5e077b0b857efd48cf820fd1ee0cd640affefb0 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 13:13:56 +0000
Subject: [PATCH 13/48] ASan: MPI World test buffer too small

---
 tests/test/scheduler/test_mpi_world.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test/scheduler/test_mpi_world.cpp b/tests/test/scheduler/test_mpi_world.cpp
index bcbf53973..b250d5130 100644
--- a/tests/test/scheduler/test_mpi_world.cpp
+++ b/tests/test/scheduler/test_mpi_world.cpp
@@ -69,7 +69,7 @@ TEST_CASE_METHOD(MpiBaseTestFixture, "Test cartesian communicator", "[mpi]")
     int worldSize;
     int maxDims = 3;
     std::vector<int> dims(maxDims);
-    std::vector<int> periods(2, 1);
+    std::vector<int> periods(maxDims, 1);
     std::vector<std::vector<int>> expectedShift;
     std::vector<std::vector<int>> expectedCoords;
 

From be1b08d9b1d3557b9590ecd04b862cf28c0c9a86 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 13:29:34 +0000
Subject: [PATCH 14/48] ASan: fix mismatched new[]-delete

---
 include/faabric/state/InMemoryStateKeyValue.h | 6 +++---
 src/state/InMemoryStateKeyValue.cpp           | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/faabric/state/InMemoryStateKeyValue.h b/include/faabric/state/InMemoryStateKeyValue.h
index dc69e6166..856b6d3c5 100644
--- a/include/faabric/state/InMemoryStateKeyValue.h
+++ b/include/faabric/state/InMemoryStateKeyValue.h
@@ -18,13 +18,13 @@ enum InMemoryStateKeyStatus
 class AppendedInMemoryState
 {
   public:
-    AppendedInMemoryState(size_t lengthIn, uint8_t* dataIn)
+    AppendedInMemoryState(size_t lengthIn, std::unique_ptr<uint8_t[]>&& dataIn)
       : length(lengthIn)
-      , data(dataIn)
+      , data(std::move(dataIn))
     {}
 
     size_t length;
-    std::unique_ptr<uint8_t> data;
+    std::unique_ptr<uint8_t[]> data;
 };
 
 class InMemoryStateKeyValue final : public StateKeyValue
diff --git a/src/state/InMemoryStateKeyValue.cpp b/src/state/InMemoryStateKeyValue.cpp
index a73703cdf..a503528bc 100644
--- a/src/state/InMemoryStateKeyValue.cpp
+++ b/src/state/InMemoryStateKeyValue.cpp
@@ -159,11 +159,11 @@ void InMemoryStateKeyValue::appendToRemote(const uint8_t* data, size_t length)
 {
     if (status == InMemoryStateKeyStatus::MASTER) {
         // Create new memory region to hold data
-        auto dataCopy = new uint8_t[length];
-        std::copy(data, data + length, dataCopy);
+        auto dataCopy = std::make_unique<uint8_t[]>(length);
+        std::copy(data, data + length, dataCopy.get());
 
         // Add to list
-        appendedData.emplace_back(length, dataCopy);
+        appendedData.emplace_back(length, std::move(dataCopy));
     } else {
         StateClient cli(user, key, masterIP);
         cli.append(data, length);

From 45bf9786443d40bc4fe8ad80ba215ed0fa663677 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 13:31:30 +0000
Subject: [PATCH 15/48] ASan: out-of-bound read, this was a simple copy-paste
 error

---
 tests/test/state/test_state.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test/state/test_state.cpp b/tests/test/state/test_state.cpp
index af9e7aa3b..8f0fcc715 100644
--- a/tests/test/state/test_state.cpp
+++ b/tests/test/state/test_state.cpp
@@ -935,8 +935,8 @@ TEST_CASE_METHOD(
 
     std::vector<uint8_t> actualA(lenA, 0);
     std::vector<uint8_t> expectedA(lenA, 1);
-    std::vector<uint8_t> actualB(lenA, 0);
-    std::vector<uint8_t> expectedB(lenA, 1);
+    std::vector<uint8_t> actualB(lenB, 0);
+    std::vector<uint8_t> expectedB(lenB, 1);
 
     const std::shared_ptr<state::StateKeyValue>& localKv = getLocalKv();
     localKv->getChunk(offsetA, actualA.data(), lenA);

From 11c3a775aefccc8b13e9a385d8912345490ab3e6 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 13:41:40 +0000
Subject: [PATCH 16/48] ASan: Copy temporary strings in RapidJSON

---
 src/util/json.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/util/json.cpp b/src/util/json.cpp
index 1278fda4a..7d0fe34af 100644
--- a/src/util/json.cpp
+++ b/src/util/json.cpp
@@ -201,8 +201,10 @@ std::string messageToJson(const faabric::Message& msg)
             }
 
             std::string out = ss.str();
+            // As out is a temporary, pass the allocator to Value() to make a
+            // copy
             d.AddMember(
-              "exec_graph_detail", Value(out.c_str(), out.size()).Move(), a);
+              "exec_graph_detail", Value(out.c_str(), out.size(), a).Move(), a);
         }
 
         if (msg.intexecgraphdetails_size() > 0) {
@@ -218,11 +220,11 @@ std::string messageToJson(const faabric::Message& msg)
             }
 
             std::string out = ss.str();
-
-            // Need to create a value (instead of move) as the string's scope
-            // is smaller than the document's one
-            Value value = Value(out.c_str(), out.size(), a);
-            d.AddMember("int_exec_graph_detail", value, a);
+            // As out is a temporary, pass the allocator to Value() to make a
+            // copy
+            d.AddMember("int_exec_graph_detail",
+                        Value(out.c_str(), out.size(), a).Move(),
+                        a);
         }
     }
 

From d75741c5d30ad7ca693e78be915e9c31b5074e4f Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 14:17:44 +0000
Subject: [PATCH 17/48] fix #183: Use unique_ptr for all redis reply objects so
 they get cleaned up on exceptions and any scope exit

---
 include/faabric/redis/Redis.h |   5 +-
 src/redis/Redis.cpp           | 280 +++++++++++++++-------------------
 2 files changed, 128 insertions(+), 157 deletions(-)

diff --git a/include/faabric/redis/Redis.h b/include/faabric/redis/Redis.h
index 9a952c4a3..acdb23684 100644
--- a/include/faabric/redis/Redis.h
+++ b/include/faabric/redis/Redis.h
@@ -4,6 +4,7 @@
 #include <faabric/util/exception.h>
 
 #include <hiredis/hiredis.h>
+#include <memory>
 #include <mutex>
 #include <set>
 #include <string>
@@ -64,6 +65,8 @@ return 0
 )---";
 };
 
+using UniqueRedisReply = std::unique_ptr<redisReply, decltype(&freeReplyObject)>;
+
 class Redis
 {
 
@@ -211,7 +214,7 @@ class Redis
 
     const RedisInstance& instance;
 
-    redisReply* dequeueBase(const std::string& queueName, int timeout);
+    UniqueRedisReply dequeueBase(const std::string& queueName, int timeout);
 };
 
 class RedisNoResponseException : public faabric::util::FaabricException
diff --git a/src/redis/Redis.cpp b/src/redis/Redis.cpp
index 4bf6dc301..ac38d5acc 100644
--- a/src/redis/Redis.cpp
+++ b/src/redis/Redis.cpp
@@ -11,6 +11,11 @@
 
 namespace faabric::redis {
 
+UniqueRedisReply wrapReply(redisReply* r)
+{
+    return UniqueRedisReply(r, &freeReplyObject);
+}
+
 RedisInstance::RedisInstance(RedisRole roleIn)
   : role(roleIn)
 {
@@ -108,22 +113,22 @@ Redis& Redis::getQueue()
     return redisQueue;
 }
 
-long getLongFromReply(redisReply* reply)
+long getLongFromReply(const redisReply& reply)
 {
     long res = 0;
 
-    if (reply->str != nullptr) {
-        res = std::stol(reply->str);
+    if (reply.str != nullptr) {
+        res = std::stol(reply.str);
     }
 
     return res;
 }
 
-std::vector<uint8_t> getBytesFromReply(redisReply* reply)
+std::vector<uint8_t> getBytesFromReply(const redisReply& reply)
 {
     // We have to be careful here to handle the bytes properly
-    char* resultArray = reply->str;
-    int resultLen = reply->len;
+    const char* resultArray = reply.str;
+    const size_t resultLen = reply.len;
 
     std::vector<uint8_t> resultData(resultArray, resultArray + resultLen);
 
@@ -131,13 +136,13 @@ std::vector<uint8_t> getBytesFromReply(redisReply* reply)
 }
 
 void getBytesFromReply(const std::string& key,
-                       redisReply* reply,
+                       const redisReply& reply,
                        uint8_t* buffer,
                        size_t bufferLen)
 {
     // We have to be careful here to handle the bytes properly
-    char* resultArray = reply->str;
-    int resultLen = reply->len;
+    const char* resultArray = reply.str;
+    const size_t resultLen = reply.len;
 
     if (resultLen > (int)bufferLen) {
         SPDLOG_ERROR("Value ({}) too big for buffer ({}) - key {}",
@@ -147,21 +152,20 @@ void getBytesFromReply(const std::string& key,
         throw std::runtime_error("Reading value too big for buffer");
     }
 
-    std::copy(resultArray, resultArray + resultLen, buffer);
+    std::copy_n(resultArray, resultLen, buffer);
 }
 
 /**
  *  ------ Lua scripts ------
  */
 
-long extractScriptResult(redisReply* reply)
+long extractScriptResult(const redisReply& reply)
 {
-    if (reply->type == REDIS_REPLY_ERROR) {
-        throw(std::runtime_error(reply->str));
+    if (reply.type == REDIS_REPLY_ERROR) {
+        throw(std::runtime_error(reply.str));
     }
 
-    long result = reply->integer;
-    freeReplyObject(reply);
+    long result = reply.integer;
 
     return result;
 }
@@ -174,12 +178,10 @@ void Redis::ping()
 {
 
     SPDLOG_DEBUG("Pinging redis at {}", instance.hostname);
-    auto reply = (redisReply*)redisCommand(context, "PING");
+    auto reply = wrapReply((redisReply*)redisCommand(context, "PING"));
 
     std::string response(reply->str);
 
-    freeReplyObject(reply);
-
     if (response != "PONG") {
         SPDLOG_DEBUG("Failed pinging redis at {}", instance.hostname);
         throw std::runtime_error("Failed to ping redis host");
@@ -190,34 +192,35 @@ void Redis::ping()
 
 size_t Redis::strlen(const std::string& key)
 {
-    auto reply = (redisReply*)redisCommand(context, "STRLEN %s", key.c_str());
+    auto reply =
+      wrapReply((redisReply*)redisCommand(context, "STRLEN %s", key.c_str()));
 
     size_t result = reply->integer;
-    freeReplyObject(reply);
     return result;
 }
 
 void Redis::get(const std::string& key, uint8_t* buffer, size_t size)
 {
-    auto reply = (redisReply*)redisCommand(context, "GET %s", key.c_str());
+    auto reply =
+      wrapReply((redisReply*)redisCommand(context, "GET %s", key.c_str()));
 
-    getBytesFromReply(key, reply, buffer, size);
-    freeReplyObject(reply);
+    getBytesFromReply(key, *reply, buffer, size);
 }
 
 std::vector<uint8_t> Redis::get(const std::string& key)
 {
-    auto reply = (redisReply*)redisCommand(context, "GET %s", key.c_str());
+    auto reply =
+      wrapReply((redisReply*)redisCommand(context, "GET %s", key.c_str()));
 
-    const std::vector<uint8_t> replyBytes = getBytesFromReply(reply);
-    freeReplyObject(reply);
+    const std::vector<uint8_t> replyBytes = getBytesFromReply(*reply);
 
     return replyBytes;
 }
 
 long Redis::getCounter(const std::string& key)
 {
-    auto reply = (redisReply*)redisCommand(context, "GET %s", key.c_str());
+    auto reply =
+      wrapReply((redisReply*)redisCommand(context, "GET %s", key.c_str()));
 
     if (reply == nullptr || reply->type == REDIS_REPLY_NIL || reply->len == 0) {
         return 0;
@@ -228,19 +231,18 @@ long Redis::getCounter(const std::string& key)
 
 long Redis::incr(const std::string& key)
 {
-    auto reply = (redisReply*)redisCommand(context, "INCR %s", key.c_str());
+    auto reply =
+      wrapReply((redisReply*)redisCommand(context, "INCR %s", key.c_str()));
 
     long result = reply->integer;
-
-    freeReplyObject(reply);
     return result;
 }
 
 long Redis::decr(const std::string& key)
 {
-    auto reply = (redisReply*)redisCommand(context, "DECR %s", key.c_str());
+    auto reply =
+      wrapReply((redisReply*)redisCommand(context, "DECR %s", key.c_str()));
     long result = reply->integer;
-    freeReplyObject(reply);
 
     return result;
 }
@@ -249,12 +251,11 @@ long Redis::incrByLong(const std::string& key, long val)
 {
     // Format is NOT printf compatible contrary to what the docs say, hence %i
     // instead of %l.
-    auto reply =
-      (redisReply*)redisCommand(context, "INCRBY %s %i", key.c_str(), val);
+    auto reply = wrapReply(
+      (redisReply*)redisCommand(context, "INCRBY %s %i", key.c_str(), val));
 
     long result = reply->integer;
 
-    freeReplyObject(reply);
     return result;
 }
 
@@ -262,12 +263,11 @@ long Redis::decrByLong(const std::string& key, long val)
 {
     // Format is NOT printf compatible contrary to what the docs say, hence %i
     // instead of %l.
-    auto reply =
-      (redisReply*)redisCommand(context, "DECRBY %s %i", key.c_str(), val);
+    auto reply = wrapReply(
+      (redisReply*)redisCommand(context, "DECRBY %s %i", key.c_str(), val));
 
     long result = reply->integer;
 
-    freeReplyObject(reply);
     return result;
 }
 
@@ -278,20 +278,18 @@ void Redis::set(const std::string& key, const std::vector<uint8_t>& value)
 
 void Redis::set(const std::string& key, const uint8_t* value, size_t size)
 {
-    auto reply =
-      (redisReply*)redisCommand(context, "SET %s %b", key.c_str(), value, size);
+    auto reply = wrapReply((redisReply*)redisCommand(
+      context, "SET %s %b", key.c_str(), value, size));
 
     if (reply->type == REDIS_REPLY_ERROR) {
         SPDLOG_ERROR("Failed to SET {} - {}", key.c_str(), reply->str);
     }
-
-    freeReplyObject(reply);
 }
 
 void Redis::del(const std::string& key)
 {
-    auto reply = (redisReply*)redisCommand(context, "DEL %s", key.c_str());
-    freeReplyObject(reply);
+    auto reply =
+      wrapReply((redisReply*)redisCommand(context, "DEL %s", key.c_str()));
 }
 
 void Redis::setRange(const std::string& key,
@@ -299,15 +297,13 @@ void Redis::setRange(const std::string& key,
                      const uint8_t* value,
                      size_t size)
 {
-    auto reply = (redisReply*)redisCommand(
-      context, "SETRANGE %s %li %b", key.c_str(), offset, value, size);
+    auto reply = wrapReply((redisReply*)redisCommand(
+      context, "SETRANGE %s %li %b", key.c_str(), offset, value, size));
 
     if (reply->type != REDIS_REPLY_INTEGER) {
         SPDLOG_ERROR("Failed SETRANGE {}", key);
         throw std::runtime_error("Failed SETRANGE " + key);
     }
-
-    freeReplyObject(reply);
 }
 
 void Redis::setRangePipeline(const std::string& key,
@@ -324,6 +320,7 @@ void Redis::flushPipeline(long pipelineLength)
     void* reply;
     for (long p = 0; p < pipelineLength; p++) {
         redisGetReply(context, &reply);
+        auto _replyGuard = wrapReply((redisReply*)reply);
 
         if (reply == nullptr ||
             ((redisReply*)reply)->type == REDIS_REPLY_ERROR) {
@@ -331,73 +328,63 @@ void Redis::flushPipeline(long pipelineLength)
             throw std::runtime_error("Failed pipeline call " +
                                      std::to_string(p));
         }
-
-        freeReplyObject(reply);
     }
 }
 
 void Redis::sadd(const std::string& key, const std::string& value)
 {
-    auto reply = (redisReply*)redisCommand(
-      context, "SADD %s %s", key.c_str(), value.c_str());
+    auto reply = wrapReply((redisReply*)redisCommand(
+      context, "SADD %s %s", key.c_str(), value.c_str()));
     if (reply->type == REDIS_REPLY_ERROR) {
         SPDLOG_ERROR("Failed to add {} to set {}", value, key);
         throw std::runtime_error("Failed to add element to set");
     }
-
-    freeReplyObject(reply);
 }
 
 void Redis::srem(const std::string& key, const std::string& value)
 {
-    auto reply = (redisReply*)redisCommand(
-      context, "SREM %s %s", key.c_str(), value.c_str());
-    freeReplyObject(reply);
+    auto reply = wrapReply((redisReply*)redisCommand(
+      context, "SREM %s %s", key.c_str(), value.c_str()));
 }
 
 long Redis::scard(const std::string& key)
 {
-    auto reply = (redisReply*)redisCommand(context, "SCARD %s", key.c_str());
+    auto reply =
+      wrapReply((redisReply*)redisCommand(context, "SCARD %s", key.c_str()));
 
     long res = reply->integer;
 
-    freeReplyObject(reply);
-
     return res;
 }
 
 bool Redis::sismember(const std::string& key, const std::string& value)
 {
-    auto reply = (redisReply*)redisCommand(
-      context, "SISMEMBER %s %s", key.c_str(), value.c_str());
+    auto reply = wrapReply((redisReply*)redisCommand(
+      context, "SISMEMBER %s %s", key.c_str(), value.c_str()));
 
     bool res = reply->integer == 1;
 
-    freeReplyObject(reply);
-
     return res;
 }
 
 std::string Redis::srandmember(const std::string& key)
 {
-    auto reply =
-      (redisReply*)redisCommand(context, "SRANDMEMBER %s", key.c_str());
+    auto reply = wrapReply(
+      (redisReply*)redisCommand(context, "SRANDMEMBER %s", key.c_str()));
 
     std::string res;
     if (reply->len > 0) {
         res = reply->str;
     }
 
-    freeReplyObject(reply);
-
     return res;
 }
 
-std::set<std::string> extractStringSetFromReply(redisReply* reply)
+std::set<std::string> extractStringSetFromReply(const redisReply& reply)
 {
     std::set<std::string> retValue;
-    for (size_t i = 0; i < reply->elements; i++) {
-        retValue.insert(reply->element[i]->str);
+    for (size_t i = 0; i < reply.elements; i++) {
+        retValue.insert(reply.element[i]->str);
     }
 
     return retValue;
@@ -405,90 +392,83 @@ std::set<std::string> extractStringSetFromReply(redisReply* reply)
 
 std::set<std::string> Redis::smembers(const std::string& key)
 {
-    auto reply = (redisReply*)redisCommand(context, "SMEMBERS %s", key.c_str());
-    std::set<std::string> result = extractStringSetFromReply(reply);
+    auto reply =
+      wrapReply((redisReply*)redisCommand(context, "SMEMBERS %s", key.c_str()));
+    std::set<std::string> result = extractStringSetFromReply(*reply);
 
-    freeReplyObject(reply);
     return result;
 }
 
 std::set<std::string> Redis::sinter(const std::string& keyA,
                                     const std::string& keyB)
 {
-    auto reply = (redisReply*)redisCommand(
-      context, "SINTER %s %s", keyA.c_str(), keyB.c_str());
-    std::set<std::string> result = extractStringSetFromReply(reply);
+    auto reply = wrapReply((redisReply*)redisCommand(
+      context, "SINTER %s %s", keyA.c_str(), keyB.c_str()));
+    std::set<std::string> result = extractStringSetFromReply(*reply);
 
-    freeReplyObject(reply);
     return result;
 }
 
 std::set<std::string> Redis::sdiff(const std::string& keyA,
                                    const std::string& keyB)
 {
-    auto reply = (redisReply*)redisCommand(
-      context, "SDIFF %s %s", keyA.c_str(), keyB.c_str());
-    std::set<std::string> result = extractStringSetFromReply(reply);
+    auto reply = wrapReply((redisReply*)redisCommand(
+      context, "SDIFF %s %s", keyA.c_str(), keyB.c_str()));
+    std::set<std::string> result = extractStringSetFromReply(*reply);
 
-    freeReplyObject(reply);
     return result;
 }
 
 int Redis::lpushLong(const std::string& key, long value)
 {
-    auto reply =
-      (redisReply*)redisCommand(context, "LPUSH %s %i", key.c_str(), value);
+    auto reply = wrapReply(
+      (redisReply*)redisCommand(context, "LPUSH %s %i", key.c_str(), value));
     long long int result = reply->integer;
 
-    freeReplyObject(reply);
     return result;
 }
 
 int Redis::rpushLong(const std::string& key, long value)
 {
-    auto reply =
-      (redisReply*)redisCommand(context, "RPUSH %s %i", key.c_str(), value);
+    auto reply = wrapReply(
+      (redisReply*)redisCommand(context, "RPUSH %s %i", key.c_str(), value));
     long long int result = reply->integer;
-    freeReplyObject(reply);
     return result;
 }
 
 void Redis::flushAll()
 {
-    auto reply = (redisReply*)redisCommand(context, "FLUSHALL");
-    freeReplyObject(reply);
+    auto reply = wrapReply((redisReply*)redisCommand(context, "FLUSHALL"));
 }
 
 long Redis::listLength(const std::string& queueName)
 {
-    auto reply =
-      (redisReply*)redisCommand(context, "LLEN %s", queueName.c_str());
+    auto reply = wrapReply(
+      (redisReply*)redisCommand(context, "LLEN %s", queueName.c_str()));
 
     if (reply == nullptr || reply->type == REDIS_REPLY_NIL) {
         return 0;
     }
 
     long result = reply->integer;
-    freeReplyObject(reply);
 
     return result;
 }
 
 long Redis::getTtl(const std::string& key)
 {
-    auto reply = (redisReply*)redisCommand(context, "TTL %s", key.c_str());
+    auto reply =
+      wrapReply((redisReply*)redisCommand(context, "TTL %s", key.c_str()));
 
     long ttl = reply->integer;
-    freeReplyObject(reply);
 
     return ttl;
 }
 
 void Redis::expire(const std::string& key, long expiry)
 {
-    auto reply =
-      (redisReply*)redisCommand(context, "EXPIRE %s %d", key.c_str(), expiry);
-    freeReplyObject(reply);
+    auto reply = wrapReply(
+      (redisReply*)redisCommand(context, "EXPIRE %s %d", key.c_str(), expiry));
 }
 
 void Redis::refresh()
@@ -512,13 +492,12 @@ void Redis::getRange(const std::string& key,
           " too long for buffer length " + std::to_string(bufferLen));
     }
 
-    auto reply = (redisReply*)redisCommand(
-      context, "GETRANGE %s %li %li", key.c_str(), start, end);
+    auto reply = wrapReply((redisReply*)redisCommand(
+      context, "GETRANGE %s %li %li", key.c_str(), start, end));
 
     // Importantly getrange is inclusive so we need to be checking the buffer
     // length
-    getBytesFromReply(key, reply, buffer, bufferLen);
-    freeReplyObject(reply);
+    getBytesFromReply(key, *reply, buffer, bufferLen);
 }
 
 /**
@@ -550,13 +529,14 @@ void Redis::releaseLock(const std::string& key, uint32_t lockId)
 void Redis::delIfEq(const std::string& key, uint32_t value)
 {
     // Invoke the script
-    auto reply = (redisReply*)redisCommand(context,
-                                           "EVALSHA %s 1 %s %i",
-                                           instance.delifeqSha.c_str(),
-                                           key.c_str(),
-                                           value);
+    auto reply =
+      wrapReply((redisReply*)redisCommand(context,
+                                          "EVALSHA %s 1 %s %i",
+                                          instance.delifeqSha.c_str(),
+                                          key.c_str(),
+                                          value));
 
-    extractScriptResult(reply);
+    extractScriptResult(*reply);
 }
 
 bool Redis::setnxex(const std::string& key, long value, int expirySeconds)
@@ -565,8 +545,8 @@ bool Redis::setnxex(const std::string& key, long value, int expirySeconds)
     // We use NX to say "set if not exists" and ex to specify the expiry of this
     // key/value This is useful in implementing locks. We only use longs as
     // values to keep things simple
-    auto reply = (redisReply*)redisCommand(
-      context, "SET %s %i EX %i NX", key.c_str(), value, expirySeconds);
+    auto reply = wrapReply((redisReply*)redisCommand(
+      context, "SET %s %i EX %i NX", key.c_str(), value, expirySeconds));
 
     bool success = false;
     if (reply->type == REDIS_REPLY_ERROR) {
@@ -575,17 +555,15 @@ bool Redis::setnxex(const std::string& key, long value, int expirySeconds)
         success = true;
     }
 
-    freeReplyObject(reply);
-
     return success;
 }
 
 long Redis::getLong(const std::string& key)
 {
-    auto reply = (redisReply*)redisCommand(context, "GET %s", key.c_str());
+    auto reply =
+      wrapReply((redisReply*)redisCommand(context, "GET %s", key.c_str()));
 
-    long res = getLongFromReply(reply);
-    freeReplyObject(reply);
+    long res = getLongFromReply(*reply);
 
     return res;
 }
@@ -594,10 +572,8 @@ void Redis::setLong(const std::string& key, long value)
 {
     // Format is NOT printf compatible contrary to what the docs say, hence %i
     // instead of %l.
-    auto reply =
-      (redisReply*)redisCommand(context, "SET %s %i", key.c_str(), value);
-
-    freeReplyObject(reply);
+    auto reply = wrapReply(
+      (redisReply*)redisCommand(context, "SET %s %i", key.c_str(), value));
 }
 
 /**
@@ -606,9 +582,8 @@ void Redis::setLong(const std::string& key, long value)
 
 void Redis::enqueue(const std::string& queueName, const std::string& value)
 {
-    auto reply = (redisReply*)redisCommand(
-      context, "RPUSH %s %s", queueName.c_str(), value.c_str());
-    freeReplyObject(reply);
+    auto reply = wrapReply((redisReply*)redisCommand(
+      context, "RPUSH %s %s", queueName.c_str(), value.c_str()));
 }
 
 void Redis::enqueueBytes(const std::string& queueName,
@@ -638,24 +613,24 @@ void Redis::enqueueBytes(const std::string& queueName,
     freeReplyObject(reply);
 }
 
-redisReply* Redis::dequeueBase(const std::string& queueName, int timeoutMs)
+UniqueRedisReply Redis::dequeueBase(const std::string& queueName, int timeoutMs)
 {
     // NOTE - we contradict the default redis behaviour here by doing a
     // non-blocking pop when timeout is zero (rather than infinite as in Redis)
     bool isBlocking = timeoutMs > 0;
 
-    redisReply* reply;
+    UniqueRedisReply reply{ nullptr, &freeReplyObject };
     if (isBlocking) {
         // Note, timeouts need to be converted into seconds
         // Floor to one second
         int timeoutSecs = std::max(timeoutMs / 1000, 1);
 
-        reply = (redisReply*)redisCommand(
-          context, "BLPOP %s %d", queueName.c_str(), timeoutSecs);
+        reply = wrapReply((redisReply*)redisCommand(
+          context, "BLPOP %s %d", queueName.c_str(), timeoutSecs));
     } else {
         // LPOP is non-blocking
-        reply =
-          (redisReply*)redisCommand(context, "LPOP %s", queueName.c_str());
+        reply = wrapReply(
+          (redisReply*)redisCommand(context, "LPOP %s", queueName.c_str()));
     }
 
     // Check if we got anything
@@ -692,7 +667,7 @@ redisReply* Redis::dequeueBase(const std::string& queueName, int timeoutMs)
 std::string Redis::dequeue(const std::string& queueName, int timeoutMs)
 {
     bool isBlocking = timeoutMs > 0;
-    redisReply* reply = this->dequeueBase(queueName, timeoutMs);
+    auto reply = this->dequeueBase(queueName, timeoutMs);
 
     std::string result;
     if (isBlocking) {
@@ -702,8 +677,6 @@ std::string Redis::dequeue(const std::string& queueName, int timeoutMs)
         result = reply->str;
     }
 
-    freeReplyObject(reply);
-
     return result;
 }
 
@@ -713,8 +686,8 @@ void Redis::dequeueMultiple(const std::string& queueName,
                             long nElems)
 {
     // NOTE - much like other range stuff with redis, this is *INCLUSIVE*
-    auto reply = (redisReply*)redisCommand(
-      context, "LRANGE %s 0 %i", queueName.c_str(), nElems - 1);
+    auto reply = wrapReply((redisReply*)redisCommand(
+      context, "LRANGE %s 0 %i", queueName.c_str(), nElems - 1));
 
     long offset = 0;
     for (size_t i = 0; i < reply->elements; i++) {
@@ -728,28 +701,23 @@ void Redis::dequeueMultiple(const std::string& queueName,
                                  std::to_string(offset) + " buffer " +
                                  std::to_string(buffLen) + ")");
     }
-
-    freeReplyObject(reply);
 }
 
 std::vector<uint8_t> Redis::dequeueBytes(const std::string& queueName,
                                          int timeoutMs)
 {
     bool isBlocking = timeoutMs > 0;
-    redisReply* reply = this->dequeueBase(queueName, timeoutMs);
+    auto reply = this->dequeueBase(queueName, timeoutMs);
 
     std::vector<uint8_t> replyBytes;
     if (isBlocking) {
         // BLPOP will return the queue name and the value returned (elements
         // 0 and 1)
-        redisReply* r = reply->element[1];
-        replyBytes = getBytesFromReply(r);
+        replyBytes = getBytesFromReply(*reply->element[1]);
     } else {
-        replyBytes = getBytesFromReply(reply);
+        replyBytes = getBytesFromReply(*reply);
     }
 
-    freeReplyObject(reply);
-
     return replyBytes;
 }
 
@@ -759,7 +727,8 @@ void Redis::dequeueBytes(const std::string& queueName,
                          int timeoutMs)
 {
     bool isBlocking = timeoutMs > 0;
-    redisReply* reply = this->dequeueBase(queueName, timeoutMs);
+    auto replyOwned = this->dequeueBase(queueName, timeoutMs);
+    auto reply = replyOwned.get();
 
     if (isBlocking) {
         reply = reply->element[1];
@@ -776,25 +745,24 @@ void Redis::dequeueBytes(const std::string& queueName,
     }
 
     memcpy(buffer, resultBytes, resultLen);
-
-    freeReplyObject(reply);
 }
 
 void Redis::publishSchedulerResult(const std::string& key,
                                    const std::string& status_key,
                                    const std::vector<uint8_t>& result)
 {
-    auto reply = (redisReply*)redisCommand(context,
-                                           "EVALSHA %s 2 %s %s %b %d %d",
-                                           instance.schedPublishSha.c_str(),
-                                           // keys
-                                           key.c_str(),
-                                           status_key.c_str(),
-                                           // argv
-                                           result.data(),
-                                           result.size(),
-                                           RESULT_KEY_EXPIRY,
-                                           STATUS_KEY_EXPIRY);
-    extractScriptResult(reply);
+    auto reply =
+      wrapReply((redisReply*)redisCommand(context,
+                                          "EVALSHA %s 2 %s %s %b %d %d",
+                                          instance.schedPublishSha.c_str(),
+                                          // keys
+                                          key.c_str(),
+                                          status_key.c_str(),
+                                          // argv
+                                          result.data(),
+                                          result.size(),
+                                          RESULT_KEY_EXPIRY,
+                                          STATUS_KEY_EXPIRY));
+    extractScriptResult(*reply);
 }
 }

From f43fa32d1ae3a672ee801ca0c664ff59fbd06b2d Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 14:25:45 +0000
Subject: [PATCH 18/48] ASan/leak: use unique_ptr for pulled and dirtyMask to
 free the arrays automatically in the destructor

---
 include/faabric/state/StateKeyValue.h |  4 ++--
 src/state/StateKeyValue.cpp           | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/faabric/state/StateKeyValue.h b/include/faabric/state/StateKeyValue.h
index 2edf3d218..7274f00b7 100644
--- a/include/faabric/state/StateKeyValue.h
+++ b/include/faabric/state/StateKeyValue.h
@@ -142,8 +142,8 @@ class StateKeyValue
     // Flags for tracking allocation and initial pull
     std::atomic<bool> fullyAllocated = false;
     std::atomic<bool> fullyPulled = false;
-    void* pulledMask = nullptr;
-    void* dirtyMask = nullptr;
+    std::unique_ptr<uint8_t[]> pulledMask = nullptr;
+    std::unique_ptr<uint8_t[]> dirtyMask = nullptr;
     bool isDirty = false;
 
     void zeroDirtyMask();
diff --git a/src/state/StateKeyValue.cpp b/src/state/StateKeyValue.cpp
index 6f7c223c9..77ebd3246 100644
--- a/src/state/StateKeyValue.cpp
+++ b/src/state/StateKeyValue.cpp
@@ -43,11 +43,11 @@ void StateKeyValue::configureSize()
     sharedMemSize = nHostPages * HOST_PAGE_SIZE;
     sharedMemory = nullptr;
 
-    dirtyMask = new uint8_t[valueSize];
+    dirtyMask = std::make_unique<uint8_t[]>(valueSize);
     zeroDirtyMask();
 
-    pulledMask = new uint8_t[valueSize];
-    ::memset(pulledMask, 0, valueSize);
+    pulledMask = std::make_unique<uint8_t[]>(valueSize);
+    ::memset(pulledMask.get(), 0, valueSize);
 }
 
 void StateKeyValue::checkSizeConfigured()
@@ -73,7 +73,7 @@ bool StateKeyValue::isChunkPulled(long offset, size_t length)
     }
 
     // TODO - more efficient way of checking this
-    auto pulledMaskBytes = BYTES(pulledMask);
+    auto pulledMaskBytes = pulledMask.get();
     for (size_t i = 0; i < length; i++) {
         if (pulledMaskBytes[offset + i] == 0) {
             return false;
@@ -222,7 +222,7 @@ void StateKeyValue::flagDirty()
 void StateKeyValue::zeroDirtyMask()
 {
     checkSizeConfigured();
-    memset(dirtyMask, 0, valueSize);
+    memset(dirtyMask.get(), 0, valueSize);
 }
 
 void StateKeyValue::flagChunkDirty(long offset, long len)
@@ -236,7 +236,7 @@ void StateKeyValue::flagChunkDirty(long offset, long len)
 void StateKeyValue::markDirtyChunk(long offset, long len)
 {
     isDirty |= true;
-    memset(BYTES(dirtyMask) + offset, ONES_BITMASK, len);
+    memset(dirtyMask.get() + offset, ONES_BITMASK, len);
 }
 
 size_t StateKeyValue::size() const
@@ -410,7 +410,7 @@ void StateKeyValue::pushPartial()
 {
     checkSizeConfigured();
 
-    auto dirtyMaskBytes = BYTES(dirtyMask);
+    auto dirtyMaskBytes = dirtyMask.get();
     doPushPartial(dirtyMaskBytes);
 }
 
@@ -504,7 +504,7 @@ void StateKeyValue::doPullChunk(bool lazy, long offset, size_t length)
     pullChunkFromRemote(offset, length);
 
     // Mark the chunk as pulled
-    memset(BYTES(pulledMask) + offset, ONES_BITMASK, length);
+    memset(pulledMask.get() + offset, ONES_BITMASK, length);
 }
 
 void StateKeyValue::doPushPartial(const uint8_t* dirtyMaskBytes)

From bbc1db2164794119b00f1b9585309a9df19be55a Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 14:30:23 +0000
Subject: [PATCH 19/48] Change the BYTES macro to a template to catch potential
 type errors

---
 include/faabric/util/macros.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/faabric/util/macros.h b/include/faabric/util/macros.h
index 504e4be40..c27be2ab5 100644
--- a/include/faabric/util/macros.h
+++ b/include/faabric/util/macros.h
@@ -2,8 +2,17 @@
 
 #include <unistd.h>
 
-#define BYTES(arr) reinterpret_cast<uint8_t*>(arr)
-#define BYTES_CONST(arr) reinterpret_cast<const uint8_t*>(arr)
+template<class T>
+uint8_t* BYTES(T* arr)
+{
+    return reinterpret_cast<uint8_t*>(arr);
+}
+
+template<class T>
+const uint8_t* BYTES_CONST(const T* arr)
+{
+    return reinterpret_cast<const uint8_t*>(arr);
+}
 
 #define SLEEP_MS(ms) usleep((ms)*1000)
 

From 04bdc9b0d91d83cd42cca285219778628e5e95d3 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 14:47:46 +0000
Subject: [PATCH 20/48] Replace all other uses of new/delete with
 unique_ptr/vector. Also minor bugfix to the redis api, dequeueBytes did not
 return the actual number of bytes read - fixed that and the corresponding
 test.

---
 include/faabric/redis/Redis.h                 | 11 +++++----
 src/redis/Redis.cpp                           | 13 +++++-----
 src/scheduler/MpiWorld.cpp                    | 18 +++++++-------
 src/state/State.cpp                           | 18 +++++++-------
 tests/test/redis/test_redis.cpp               |  6 ++---
 tests/test/scheduler/test_executor.cpp        |  4 ++--
 tests/test/scheduler/test_mpi_exec_graph.cpp  | 12 ++++++----
 tests/test/scheduler/test_mpi_world.cpp       | 12 ++++++----
 .../test/scheduler/test_remote_mpi_worlds.cpp | 24 ++++++++++++-------
 tests/test/scheduler/test_scheduler.cpp       |  8 +++++--
 10 files changed, 74 insertions(+), 52 deletions(-)

diff --git a/include/faabric/redis/Redis.h b/include/faabric/redis/Redis.h
index acdb23684..b7df443cb 100644
--- a/include/faabric/redis/Redis.h
+++ b/include/faabric/redis/Redis.h
@@ -65,7 +65,8 @@ return 0
 )---";
 };
 
-using UniqueRedisReply = std::unique_ptr<redisReply, decltype(&freeReplyObject)>;
+using UniqueRedisReply =
+  std::unique_ptr<redisReply, decltype(&freeReplyObject)>;
 
 class Redis
 {
@@ -192,10 +193,10 @@ class Redis
     std::vector<uint8_t> dequeueBytes(const std::string& queueName,
                                       int timeout = DEFAULT_TIMEOUT);
 
-    void dequeueBytes(const std::string& queueName,
-                      uint8_t* buffer,
-                      size_t bufferLen,
-                      int timeout = DEFAULT_TIMEOUT);
+    size_t dequeueBytes(const std::string& queueName,
+                        uint8_t* buffer,
+                        size_t bufferLen,
+                        int timeout = DEFAULT_TIMEOUT);
 
     void dequeueMultiple(const std::string& queueName,
                          uint8_t* buff,
diff --git a/src/redis/Redis.cpp b/src/redis/Redis.cpp
index ac38d5acc..de7fe81ad 100644
--- a/src/redis/Redis.cpp
+++ b/src/redis/Redis.cpp
@@ -721,10 +721,10 @@ std::vector<uint8_t> Redis::dequeueBytes(const std::string& queueName,
     return replyBytes;
 }
 
-void Redis::dequeueBytes(const std::string& queueName,
-                         uint8_t* buffer,
-                         size_t bufferLen,
-                         int timeoutMs)
+size_t Redis::dequeueBytes(const std::string& queueName,
+                           uint8_t* buffer,
+                           size_t bufferLen,
+                           int timeoutMs)
 {
     bool isBlocking = timeoutMs > 0;
     auto replyOwned = this->dequeueBase(queueName, timeoutMs);
@@ -735,9 +735,9 @@ void Redis::dequeueBytes(const std::string& queueName,
     }
 
     auto resultBytes = (uint8_t*)reply->str;
-    int resultLen = reply->len;
+    size_t resultLen = reply->len;
 
-    if (resultLen > (int)bufferLen) {
+    if (resultLen > bufferLen) {
         throw std::runtime_error(
           "Buffer not long enough for dequeue result (buffer=" +
           std::to_string(bufferLen) + " len=" + std::to_string(resultLen) +
@@ -745,6 +745,7 @@ void Redis::dequeueBytes(const std::string& queueName,
     }
 
     memcpy(buffer, resultBytes, resultLen);
+    return resultLen;
 }
 
 void Redis::publishSchedulerResult(const std::string& key,
diff --git a/src/scheduler/MpiWorld.cpp b/src/scheduler/MpiWorld.cpp
index b017a00e4..52e76ee4f 100644
--- a/src/scheduler/MpiWorld.cpp
+++ b/src/scheduler/MpiWorld.cpp
@@ -966,25 +966,24 @@ void MpiWorld::reduce(int sendRank,
             memcpy(recvBuffer, sendBuffer, bufferSize);
         }
 
-        uint8_t* rankData = new uint8_t[bufferSize];
+        auto rankData = std::make_unique<uint8_t[]>(bufferSize);
         for (int r = 0; r < size; r++) {
             // Work out the data for this rank
-            memset(rankData, 0, bufferSize);
+            memset(rankData.get(), 0, bufferSize);
             if (r != recvRank) {
                 recv(r,
                      recvRank,
-                     rankData,
+                     rankData.get(),
                      datatype,
                      count,
                      nullptr,
                      faabric::MPIMessage::REDUCE);
 
-                op_reduce(operation, datatype, count, rankData, recvBuffer);
+                op_reduce(
+                  operation, datatype, count, rankData.get(), recvBuffer);
             }
         }
 
-        delete[] rankData;
-
     } else {
         // Do the sending
         send(sendRank,
@@ -1152,17 +1151,16 @@ void MpiWorld::scan(int rank,
 
     if (rank > 0) {
         // Receive the current accumulated value
-        auto currentAcc = new uint8_t[bufferSize];
+        auto currentAcc = std::make_unique<uint8_t[]>(bufferSize);
         recv(rank - 1,
              rank,
-             currentAcc,
+             currentAcc.get(),
              datatype,
              count,
              nullptr,
              faabric::MPIMessage::SCAN);
         // Reduce with our own value
-        op_reduce(operation, datatype, count, currentAcc, recvBuffer);
-        delete[] currentAcc;
+        op_reduce(operation, datatype, count, currentAcc.get(), recvBuffer);
     }
 
     // If not the last process, send to the next one
diff --git a/src/state/State.cpp b/src/state/State.cpp
index 63ad7b4c5..c522ac758 100644
--- a/src/state/State.cpp
+++ b/src/state/State.cpp
@@ -147,20 +147,22 @@ std::shared_ptr<StateKeyValue> State::doGetKV(const std::string& user,
     std::string stateMode = faabric::util::getSystemConfig().stateMode;
     if (stateMode == "redis") {
         if (sizeless) {
-            auto kv = new RedisStateKeyValue(user, key);
-            kvMap.emplace(lookupKey, kv);
+            auto kv = std::make_shared<RedisStateKeyValue>(user, key);
+            kvMap.emplace(lookupKey, std::move(kv));
         } else {
-            auto kv = new RedisStateKeyValue(user, key, size);
-            kvMap.emplace(lookupKey, kv);
+            auto kv = std::make_shared<RedisStateKeyValue>(user, key, size);
+            kvMap.emplace(lookupKey, std::move(kv));
         }
     } else if (stateMode == "inmemory") {
         // NOTE - passing IP here is crucial for testing
         if (sizeless) {
-            auto kv = new InMemoryStateKeyValue(user, key, thisIP);
-            kvMap.emplace(lookupKey, kv);
+            auto kv =
+              std::make_shared<InMemoryStateKeyValue>(user, key, thisIP);
+            kvMap.emplace(lookupKey, std::move(kv));
         } else {
-            auto kv = new InMemoryStateKeyValue(user, key, size, thisIP);
-            kvMap.emplace(lookupKey, kv);
+            auto kv =
+              std::make_shared<InMemoryStateKeyValue>(user, key, size, thisIP);
+            kvMap.emplace(lookupKey, std::move(kv));
         }
     } else {
         throw std::runtime_error("Unrecognised state mode: " + stateMode);
diff --git a/tests/test/redis/test_redis.cpp b/tests/test/redis/test_redis.cpp
index b5455c096..34d7c4d60 100644
--- a/tests/test/redis/test_redis.cpp
+++ b/tests/test/redis/test_redis.cpp
@@ -665,11 +665,11 @@ void checkDequeueBytes(Redis& redis,
                        const std::vector<uint8_t> expected)
 {
     unsigned long bufferLen = expected.size();
-    auto buffer = new uint8_t[bufferLen];
+    auto buffer = std::vector<uint8_t>(bufferLen, 0);
 
-    redis.dequeueBytes(queueName, buffer, bufferLen);
+    size_t len = redis.dequeueBytes(queueName, buffer.data(), buffer.size());
 
-    std::vector<uint8_t> actual(buffer, buffer + bufferLen);
+    std::vector<uint8_t> actual(buffer.cbegin(), buffer.cbegin() + len);
     REQUIRE(actual == expected);
 }
 
diff --git a/tests/test/scheduler/test_executor.cpp b/tests/test/scheduler/test_executor.cpp
index 89aa2c753..bf703655b 100644
--- a/tests/test/scheduler/test_executor.cpp
+++ b/tests/test/scheduler/test_executor.cpp
@@ -120,7 +120,8 @@ class TestExecutor final : public Executor
             faabric::snapshot::SnapshotRegistry& reg =
               faabric::snapshot::getSnapshotRegistry();
             faabric::util::SnapshotData snap;
-            snap.data = new uint8_t[10];
+            auto snapDataAllocation = std::make_unique<uint8_t[]>(10);
+            snap.data = snapDataAllocation.get();
             snap.size = 10;
 
             reg.takeSnapshot(snapKey, snap);
@@ -150,7 +151,6 @@ class TestExecutor final : public Executor
             }
 
             // Delete the snapshot
-            delete[] snap.data;
             reg.deleteSnapshot(snapKey);
 
             SPDLOG_TRACE("TestExecutor got {} thread results",
diff --git a/tests/test/scheduler/test_mpi_exec_graph.cpp b/tests/test/scheduler/test_mpi_exec_graph.cpp
index 513df0e71..20ebb86dc 100644
--- a/tests/test/scheduler/test_mpi_exec_graph.cpp
+++ b/tests/test/scheduler/test_mpi_exec_graph.cpp
@@ -20,7 +20,8 @@ TEST_CASE_METHOD(MpiTestFixture,
     MPI_Status status{};
 
     std::vector<int> messageData = { 0, 1, 2 };
-    auto buffer = new int[messageData.size()];
+    auto bufferAllocation = std::make_unique<int[]>(messageData.size());
+    auto buffer = bufferAllocation.get();
 
     int numToSend = 10;
     std::string expectedKey =
@@ -55,7 +56,8 @@ TEST_CASE_METHOD(MpiTestFixture,
     MPI_Status status{};
 
     std::vector<int> messageData = { 0, 1, 2 };
-    auto buffer = new int[messageData.size()];
+    auto bufferAllocation = std::make_unique<int[]>(messageData.size());
+    auto buffer = bufferAllocation.get();
 
     int numToSend = 10;
     std::string expectedKey =
@@ -99,7 +101,8 @@ TEST_CASE_METHOD(MpiBaseTestFixture,
       faabric::scheduler::getMpiWorldRegistry().createWorld(msg, worldId);
 
     std::vector<int> messageData = { 0, 1, 2 };
-    auto buffer = new int[messageData.size()];
+    auto bufferAllocation = std::make_unique<int[]>(messageData.size());
+    auto buffer = bufferAllocation.get();
     std::thread otherWorldThread([&messageData, &otherMsg, rank, otherRank] {
         faabric::scheduler::MpiWorld& otherWorld =
           faabric::scheduler::getMpiWorldRegistry().getOrInitialiseWorld(
@@ -142,7 +145,8 @@ TEST_CASE_METHOD(MpiTestFixture,
     MPI_Status status{};
 
     std::vector<int> messageData = { 0, 1, 2 };
-    auto buffer = new int[messageData.size()];
+    auto bufferAllocation = std::make_unique<int[]>(messageData.size());
+    auto buffer = bufferAllocation.get();
 
     std::string expectedKey;
     int msgCount;
diff --git a/tests/test/scheduler/test_mpi_world.cpp b/tests/test/scheduler/test_mpi_world.cpp
index b250d5130..70f938528 100644
--- a/tests/test/scheduler/test_mpi_world.cpp
+++ b/tests/test/scheduler/test_mpi_world.cpp
@@ -241,7 +241,8 @@ TEST_CASE_METHOD(MpiTestFixture, "Test send and recv on same host", "[mpi]")
     {
         // Receive the message
         MPI_Status status{};
-        auto buffer = new int[messageData.size()];
+        auto bufferAllocation = std::make_unique<int[]>(messageData.size());
+        auto buffer = bufferAllocation.get();
         world.recv(
           rankA1, rankA2, BYTES(buffer), MPI_INT, messageData.size(), &status);
 
@@ -418,7 +419,8 @@ TEST_CASE_METHOD(MpiTestFixture, "Test recv with partial data", "[mpi]")
     // Request to receive more values than were sent
     MPI_Status status{};
     unsigned long requestedSize = actualSize + 5;
-    auto buffer = new int[requestedSize];
+    auto bufferAllocation = std::make_unique<int[]>(requestedSize);
+    auto buffer = bufferAllocation.get();
     world.recv(1, 2, BYTES(buffer), MPI_INT, requestedSize, &status);
 
     // Check status reports only the values that were sent
@@ -453,7 +455,8 @@ TEST_CASE_METHOD(MpiTestFixture, "Test probe", "[mpi]")
     REQUIRE(statusA2.bytesSize == sizeA * sizeof(int));
 
     // Receive the message
-    auto bufferA = new int[sizeA];
+    auto bufferAAllocation = std::make_unique<int[]>(sizeA);
+    auto bufferA = bufferAAllocation.get();
     world.recv(1, 2, BYTES(bufferA), MPI_INT, sizeA * sizeof(int), nullptr);
 
     // Probe the next message
@@ -463,7 +466,8 @@ TEST_CASE_METHOD(MpiTestFixture, "Test probe", "[mpi]")
     REQUIRE(statusB.bytesSize == sizeB * sizeof(int));
 
     // Receive the next message
-    auto bufferB = new int[sizeB];
+    auto bufferBAllocation = std::make_unique<int[]>(sizeB);
+    auto bufferB = bufferBAllocation.get();
     world.recv(1, 2, BYTES(bufferB), MPI_INT, sizeB * sizeof(int), nullptr);
 }
 
diff --git a/tests/test/scheduler/test_remote_mpi_worlds.cpp b/tests/test/scheduler/test_remote_mpi_worlds.cpp
index edf5db2d3..63f6ff4d7 100644
--- a/tests/test/scheduler/test_remote_mpi_worlds.cpp
+++ b/tests/test/scheduler/test_remote_mpi_worlds.cpp
@@ -107,7 +107,8 @@ TEST_CASE_METHOD(RemoteMpiTestFixture, "Test send across hosts", "[mpi]")
 
         // Receive the message for the given rank
         MPI_Status status{};
-        auto buffer = new int[messageData.size()];
+        auto bufferAllocation = std::make_unique<int[]>(messageData.size());
+        auto buffer = bufferAllocation.get();
         otherWorld.recv(
           rankA, rankB, BYTES(buffer), MPI_INT, messageData.size(), &status);
 
@@ -160,7 +161,8 @@ TEST_CASE_METHOD(RemoteMpiTestFixture,
                           messageData.size());
 
           // Now recv
-          auto buffer = new int[messageData2.size()];
+          auto bufferAllocation = std::make_unique<int[]>(messageData2.size());
+          auto buffer = bufferAllocation.get();
           otherWorld.recv(rankA,
                           rankB,
                           BYTES(buffer),
@@ -177,7 +179,8 @@ TEST_CASE_METHOD(RemoteMpiTestFixture,
 
     // Receive the message for the given rank
     MPI_Status status{};
-    auto buffer = new int[messageData.size()];
+    auto bufferAllocation = std::make_unique<int[]>(messageData.size());
+    auto buffer = bufferAllocation.get();
     thisWorld.recv(
       rankB, rankA, BYTES(buffer), MPI_INT, messageData.size(), &status);
     std::vector<int> actual(buffer, buffer + messageData.size());
@@ -745,7 +748,8 @@ TEST_CASE_METHOD(RemoteMpiTestFixture,
           otherWorld.initialiseFromMsg(msg);
 
           // Recv once
-          auto buffer = new int[messageData.size()];
+          auto bufferAllocation = std::make_unique<int[]>(messageData.size());
+          auto buffer = bufferAllocation.get();
           otherWorld.recv(rankA,
                           rankB,
                           BYTES(buffer),
@@ -756,7 +760,8 @@ TEST_CASE_METHOD(RemoteMpiTestFixture,
           assert(actual == messageData);
 
           // Recv a second time
-          auto buffer2 = new int[messageData2.size()];
+          auto buffer2Allocation = std::make_unique<int[]>(messageData2.size());
+          auto buffer2 = buffer2Allocation.get();
           otherWorld.recv(rankA,
                           rankB,
                           BYTES(buffer2),
@@ -798,7 +803,8 @@ TEST_CASE_METHOD(RemoteMpiTestFixture,
     REQUIRE(endpointCheck == expectedEndpoints);
 
     // Finally recv a messge, the same endpoint should be used again
-    auto buffer = new int[messageData.size()];
+    auto bufferAllocation = std::make_unique<int[]>(messageData.size());
+    auto buffer = bufferAllocation.get();
     thisWorld.recv(rankB,
                    rankA,
                    BYTES(buffer),
@@ -871,7 +877,8 @@ TEST_CASE_METHOD(RemoteMpiTestFixture, "Test UMB creation", "[mpi]")
                                        false, true,  false, false };
 
     // Irecv a messge from one rank, another UMB should be created
-    auto buffer1 = new int[messageData.size()];
+    auto buffer1Allocation = std::make_unique<int[]>(messageData.size());
+    auto buffer1 = buffer1Allocation.get();
     int recvId1 = thisWorld.irecv(otherWorldRank1,
                                   thisWorldRank,
                                   BYTES(buffer1),
@@ -883,7 +890,8 @@ TEST_CASE_METHOD(RemoteMpiTestFixture, "Test UMB creation", "[mpi]")
     REQUIRE(umbCheck == expectedUmb1);
 
     // Irecv a messge from another rank, another UMB should be created
-    auto buffer2 = new int[messageData.size()];
+    auto buffer2Allocation = std::make_unique<int[]>(messageData.size());
+    auto buffer2 = buffer2Allocation.get();
     int recvId2 = thisWorld.irecv(otherWorldRank2,
                                   thisWorldRank,
                                   BYTES(buffer2),
diff --git a/tests/test/scheduler/test_scheduler.cpp b/tests/test/scheduler/test_scheduler.cpp
index 69c683119..1298158c4 100644
--- a/tests/test/scheduler/test_scheduler.cpp
+++ b/tests/test/scheduler/test_scheduler.cpp
@@ -226,9 +226,11 @@ TEST_CASE_METHOD(SlowExecutorFixture, "Test batch scheduling", "[scheduler]")
     faabric::snapshot::SnapshotRegistry& snapRegistry =
       faabric::snapshot::getSnapshotRegistry();
 
+    std::unique_ptr<uint8_t[]> snapshotDataAllocation;
     if (!expectedSnapshot.empty()) {
         snapshot.size = 1234;
-        snapshot.data = new uint8_t[snapshot.size];
+        snapshotDataAllocation = std::make_unique<uint8_t[]>(snapshot.size);
+        snapshot.data = snapshotDataAllocation.get();
 
         snapRegistry.takeSnapshot(expectedSnapshot, snapshot);
     }
@@ -420,9 +422,11 @@ TEST_CASE_METHOD(SlowExecutorFixture,
     faabric::snapshot::SnapshotRegistry& snapRegistry =
       faabric::snapshot::getSnapshotRegistry();
 
+    std::unique_ptr<uint8_t[]> snapshotDataAllocation;
     if (!expectedSnapshot.empty()) {
         snapshot.size = 1234;
-        snapshot.data = new uint8_t[snapshot.size];
+        snapshotDataAllocation = std::make_unique<uint8_t[]>(snapshot.size);
+        snapshot.data = snapshotDataAllocation.get();
         snapRegistry.takeSnapshot(expectedSnapshot, snapshot);
     }
 

From 5b855f579cc32a082ca8e3639c36d81151b7fae4 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 15:49:11 +0000
Subject: [PATCH 21/48] Add a TSan ignore list

---
 .github/workflows/sanitisers.yml | 2 +-
 CMakeLists.txt                   | 2 +-
 thread-sanitizer-ignorelist.txt  | 4 ++++
 3 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 thread-sanitizer-ignorelist.txt

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index dffb556b5..cf4b78071 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -134,7 +134,7 @@ jobs:
         run: ./bin/faabric_tests > /dev/null
         working-directory: /build/faabric/static
         env:
-          TSAN_OPTIONS: "verbosity=1:halt_on_error=1"
+          TSAN_OPTIONS: "verbosity=1:halt_on_error=1:suppressions=/code/faabric/thread-sanitizer-ignorelist.txt"
 
   undefined-sanitiser:
     if: github.event.pull_request.draft == false
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5710565e1..7f361fa15 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,7 +26,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 if (FAABRIC_USE_SANITISER STREQUAL "Address")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
 elseif (FAABRIC_USE_SANITISER STREQUAL "Thread")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread -fsanitize-ignorelist=${CMAKE_CURRENT_LIST_DIR}/thread-sanitizer-ignorelist.txt")
 elseif (FAABRIC_USE_SANITISER STREQUAL "Undefined")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
 elseif (FAABRIC_USE_SANITISER STREQUAL "Leak")
diff --git a/thread-sanitizer-ignorelist.txt b/thread-sanitizer-ignorelist.txt
new file mode 100644
index 000000000..22cdca76e
--- /dev/null
+++ b/thread-sanitizer-ignorelist.txt
@@ -0,0 +1,4 @@
+# TSan detects a race on pistache shutdown
+race:*Pistache*
+race:close
+race:socket
\ No newline at end of file

From 0a921ade3767bd6fd149cf190d4f5507c1994653 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 15:50:32 +0000
Subject: [PATCH 22/48] TSan: Fix data race in Scheduler testing code

---
 src/scheduler/Scheduler.cpp | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/scheduler/Scheduler.cpp b/src/scheduler/Scheduler.cpp
index 97f50d556..dd9b5d717 100644
--- a/src/scheduler/Scheduler.cpp
+++ b/src/scheduler/Scheduler.cpp
@@ -490,6 +490,14 @@ faabric::util::SchedulingDecision Scheduler::doCallFunctions(
     // EXECTUION
     // -------------------------------------------
 
+    // Records for tests - copy messages before execution to avoid racing on msg
+    size_t recordedMessagesOffset = recordedMessagesAll.size();
+    if (faabric::util::isTestMode()) {
+        for (int i = 0; i < nMessages; i++) {
+            recordedMessagesAll.emplace_back(req->messages().at(i));
+        }
+    }
+
     // Iterate through unique hosts and dispatch messages
     for (const std::string& host : orderedHosts) {
         // Work out which indexes are scheduled on this host
@@ -601,12 +609,12 @@ faabric::util::SchedulingDecision Scheduler::doCallFunctions(
     if (faabric::util::isTestMode()) {
         for (int i = 0; i < nMessages; i++) {
             std::string executedHost = decision.hosts.at(i);
-            faabric::Message msg = req->messages().at(i);
+            const faabric::Message& msg =
+              recordedMessagesAll.at(recordedMessagesOffset + i);
 
             // Log results if in test mode
-            recordedMessagesAll.push_back(msg);
             if (executedHost.empty() || executedHost == thisHost) {
-                recordedMessagesLocal.push_back(msg);
+                recordedMessagesLocal.emplace_back(msg);
             } else {
                 recordedMessagesShared.emplace_back(executedHost, msg);
             }

From 487791e6edefeeecfa110e0892d52f7071e532e2 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 15:52:42 +0000
Subject: [PATCH 23/48] Ignore zeromq races

---
 thread-sanitizer-ignorelist.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/thread-sanitizer-ignorelist.txt b/thread-sanitizer-ignorelist.txt
index 22cdca76e..63e864490 100644
--- a/thread-sanitizer-ignorelist.txt
+++ b/thread-sanitizer-ignorelist.txt
@@ -1,4 +1,6 @@
 # TSan detects a race on pistache shutdown
 race:*Pistache*
 race:close
-race:socket
\ No newline at end of file
+race:socket
+# Ignore ZeroMQ races
+race:zmq::*

From aef1cb3bd206ef7abb5b4192d36c028dcd9e9446 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 16:06:11 +0000
Subject: [PATCH 24/48] TSan: fix shared_ptr data races (in a shared_ptr the
 counter is atomic, but ptr is not, and requires explicit atomic accesses)

C++20 has a different api for this, but not yet supported by libstdc++
---
 src/transport/MessageEndpointServer.cpp | 48 ++++++++++++++++++-------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/src/transport/MessageEndpointServer.cpp b/src/transport/MessageEndpointServer.cpp
index 05fb77735..60a769b24 100644
--- a/src/transport/MessageEndpointServer.cpp
+++ b/src/transport/MessageEndpointServer.cpp
@@ -6,8 +6,10 @@
 #include <faabric/util/macros.h>
 #include <faabric/util/network.h>
 
+#include <atomic>
 #include <csignal>
 #include <cstdlib>
+#include <memory>
 
 namespace faabric::transport {
 
@@ -149,10 +151,12 @@ void MessageEndpointServerHandler::start(
                         }
 
                         // Wait on the request latch if necessary
-                        if (server->requestLatch != nullptr) {
+                        auto requestLatch = std::atomic_load_explicit(
+                          &server->requestLatch, std::memory_order_acquire);
+                        if (requestLatch != nullptr) {
                             SPDLOG_TRACE(
                               "Server thread waiting on worker latch");
-                            server->requestLatch->wait();
+                            requestLatch->wait();
                         }
                     }
                 }
@@ -162,10 +166,12 @@ void MessageEndpointServerHandler::start(
 
                 // Just before the thread dies, check if there's something
                 // waiting on the shutdown latch
-                if (server->shutdownLatch != nullptr) {
+                auto shutdownLatch = std::atomic_load_explicit(
+                  &server->shutdownLatch, std::memory_order_acquire);
+                if (shutdownLatch != nullptr) {
                     SPDLOG_TRACE("Server thread {} waiting on shutdown latch",
                                  i);
-                    server->shutdownLatch->wait();
+                    shutdownLatch->wait();
                 }
             });
         }
@@ -268,12 +274,18 @@ void MessageEndpointServer::stop()
                      nThreads,
                      asyncPort);
 
-        shutdownLatch = faabric::util::Latch::create(2);
+        std::atomic_store_explicit(&shutdownLatch,
+                                   faabric::util::Latch::create(2),
+                                   std::memory_order_release);
 
         asyncShutdownSender.send(shutdownHeader.data(), shutdownHeader.size());
 
-        shutdownLatch->wait();
-        shutdownLatch = nullptr;
+        std::atomic_load_explicit(&shutdownLatch, std::memory_order_acquire)
+          ->wait();
+        std::atomic_store_explicit(
+          &shutdownLatch,
+          std::shared_ptr<faabric::util::Latch>(nullptr),
+          std::memory_order_release);
     }
 
     for (int i = 0; i < nThreads; i++) {
@@ -282,13 +294,19 @@ void MessageEndpointServer::stop()
                      nThreads,
                      syncPort);
 
-        shutdownLatch = faabric::util::Latch::create(2);
+        std::atomic_store_explicit(&shutdownLatch,
+                                   faabric::util::Latch::create(2),
+                                   std::memory_order_release);
 
         syncShutdownSender.sendAwaitResponse(shutdownHeader.data(),
                                              shutdownHeader.size());
 
-        shutdownLatch->wait();
-        shutdownLatch = nullptr;
+        std::atomic_load_explicit(&shutdownLatch, std::memory_order_acquire)
+          ->wait();
+        std::atomic_store_explicit(
+          &shutdownLatch,
+          std::shared_ptr<faabric::util::Latch>(nullptr),
+          std::memory_order_release);
     }
 
     // Join the handlers
@@ -305,16 +323,20 @@ void MessageEndpointServer::onWorkerStop()
 
 void MessageEndpointServer::setRequestLatch()
 {
-    requestLatch = faabric::util::Latch::create(2);
+    std::atomic_store_explicit(&requestLatch,
+                               faabric::util::Latch::create(2),
+                               std::memory_order_release);
 }
 
 void MessageEndpointServer::awaitRequestLatch()
 {
     SPDLOG_TRACE("Waiting on worker latch for port {}", asyncPort);
-    requestLatch->wait();
+    std::atomic_load_explicit(&requestLatch, std::memory_order_acquire)->wait();
 
     SPDLOG_TRACE("Finished worker latch for port {}", asyncPort);
-    requestLatch = nullptr;
+    std::atomic_store_explicit(&requestLatch,
+                               std::shared_ptr<faabric::util::Latch>(nullptr),
+                               std::memory_order_release);
 }
 
 int MessageEndpointServer::getNThreads()

From 680e55dafc31c639cb4c5bd74ba03b9ec02aceca Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 16:15:51 +0000
Subject: [PATCH 25/48] Ignore config change data races, as they could only
 affect tests

---
 thread-sanitizer-ignorelist.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/thread-sanitizer-ignorelist.txt b/thread-sanitizer-ignorelist.txt
index 63e864490..6a00fc67c 100644
--- a/thread-sanitizer-ignorelist.txt
+++ b/thread-sanitizer-ignorelist.txt
@@ -4,3 +4,5 @@ race:close
 race:socket
 # Ignore ZeroMQ races
 race:zmq::*
+# Config only changes in tests, and in places where being slightly racy doesn't matter
+race:faabric::util::SystemConfig::*
\ No newline at end of file

From 37f1eace6d5c6f5eb1478e2a2d64fd436a417b53 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 16:19:24 +0000
Subject: [PATCH 26/48] TSan: Missing lock on scheduler reset

---
 src/scheduler/Scheduler.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/scheduler/Scheduler.cpp b/src/scheduler/Scheduler.cpp
index dd9b5d717..3fc657886 100644
--- a/src/scheduler/Scheduler.cpp
+++ b/src/scheduler/Scheduler.cpp
@@ -90,6 +90,7 @@ void Scheduler::resetThreadLocalCache()
 
 void Scheduler::reset()
 {
+    faabric::util::FullLock lock(mx);
     SPDLOG_DEBUG("Resetting scheduler");
 
     resetThreadLocalCache();

From ea910c6beb990bb4a02eb6f40a8f6bce32263dc3 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 16:55:33 +0000
Subject: [PATCH 27/48] Ignore catch2 bad signal handler errors in tsan

---
 thread-sanitizer-ignorelist.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/thread-sanitizer-ignorelist.txt b/thread-sanitizer-ignorelist.txt
index 6a00fc67c..0bbec8150 100644
--- a/thread-sanitizer-ignorelist.txt
+++ b/thread-sanitizer-ignorelist.txt
@@ -5,4 +5,6 @@ race:socket
 # Ignore ZeroMQ races
 race:zmq::*
 # Config only changes in tests, and in places where being slightly racy doesn't matter
-race:faabric::util::SystemConfig::*
\ No newline at end of file
+race:faabric::util::SystemConfig::*
+# Catch2 allocates in its signal handler, this prevents showing the wrong crash report
+signal:*

From 745c8d682f7b5dc54b2a9cefb2078c12d5e538cb Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 16:55:53 +0000
Subject: [PATCH 28/48] TSan: Fix racy executor cleanup/deadlock

---
 src/scheduler/Executor.cpp  | 38 ++++++++++++++++++++++---------------
 src/scheduler/Scheduler.cpp | 20 +++++++++++++++----
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/src/scheduler/Executor.cpp b/src/scheduler/Executor.cpp
index 54443a0c1..edcc70108 100644
--- a/src/scheduler/Executor.cpp
+++ b/src/scheduler/Executor.cpp
@@ -66,14 +66,18 @@ void Executor::finish()
         threadTaskQueues[i].enqueue(
           ExecutorTask(POOL_SHUTDOWN, nullptr, nullptr, false, false));
 
+        faabric::util::UniqueLock threadsLock(threadsMutex);
+        // copy shared_ptr to avoid racing
+        auto thread = threadPoolThreads.at(i);
         // If already killed, move to the next thread
-        if (threadPoolThreads.at(i) == nullptr) {
+        if (thread == nullptr) {
             continue;
         }
 
         // Await the thread
-        if (threadPoolThreads.at(i)->joinable()) {
-            threadPoolThreads.at(i)->join();
+        if (thread->joinable()) {
+            threadsLock.unlock();
+            thread->join();
         }
     }
 
@@ -94,6 +98,7 @@ void Executor::finish()
 
     threadPoolThreads.clear();
     threadTaskQueues.clear();
+    deadThreads.clear();
 }
 
 void Executor::executeTasks(std::vector<int> msgIdxs,
@@ -342,19 +347,22 @@ void Executor::threadPoolThread(int threadPoolIdx)
 
         // Note - we have to keep a record of dead threads so we can join them
         // all when the executor shuts down
-        std::shared_ptr<std::thread> thisThread =
-          threadPoolThreads.at(threadPoolIdx);
-        deadThreads.emplace_back(thisThread);
-
-        // Set this thread to nullptr
-        threadPoolThreads.at(threadPoolIdx) = nullptr;
-
-        // See if any threads are still running
         bool isFinished = true;
-        for (auto t : threadPoolThreads) {
-            if (t != nullptr) {
-                isFinished = false;
-                break;
+        {
+            faabric::util::UniqueLock threadsLock(threadsMutex);
+            std::shared_ptr<std::thread> thisThread =
+              threadPoolThreads.at(threadPoolIdx);
+            deadThreads.emplace_back(thisThread);
+
+            // Set this thread to nullptr
+            threadPoolThreads.at(threadPoolIdx) = nullptr;
+
+            // See if any threads are still running
+            for (auto t : threadPoolThreads) {
+                if (t != nullptr) {
+                    isFinished = false;
+                    break;
+                }
             }
         }
 
diff --git a/src/scheduler/Scheduler.cpp b/src/scheduler/Scheduler.cpp
index 3fc657886..7e698d2e9 100644
--- a/src/scheduler/Scheduler.cpp
+++ b/src/scheduler/Scheduler.cpp
@@ -90,22 +90,34 @@ void Scheduler::resetThreadLocalCache()
 
 void Scheduler::reset()
 {
-    faabric::util::FullLock lock(mx);
     SPDLOG_DEBUG("Resetting scheduler");
 
     resetThreadLocalCache();
 
     // Shut down all Executors
-    for (auto& p : executors) {
+    // Executor shutdown takes a lock itself, so "finish" executors without the
+    // lock.
+    decltype(executors) executorsList;
+    decltype(deadExecutors) deadExecutorsList;
+    {
+        faabric::util::FullLock lock(mx);
+        executorsList = executors;
+    }
+    for (auto& p : executorsList) {
         for (auto& e : p.second) {
             e->finish();
         }
     }
-
+    executorsList.clear();
+    {
+        faabric::util::FullLock lock(mx);
+        deadExecutorsList = deadExecutors;
+    }
     for (auto& e : deadExecutors) {
         e->finish();
     }
-
+    deadExecutorsList.clear();
+    faabric::util::FullLock lock(mx);
     executors.clear();
     deadExecutors.clear();
 

From 72b9b3a84a91e47a8103cd9d5786157a620668f3 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 17:02:15 +0000
Subject: [PATCH 29/48] Missing locks in SnapshotClient mock code

---
 src/snapshot/SnapshotClient.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/snapshot/SnapshotClient.cpp b/src/snapshot/SnapshotClient.cpp
index 0f2009b58..c741d5408 100644
--- a/src/snapshot/SnapshotClient.cpp
+++ b/src/snapshot/SnapshotClient.cpp
@@ -29,27 +29,32 @@ static std::vector<std::pair<std::string, std::pair<uint32_t, int>>>
 std::vector<std::pair<std::string, faabric::util::SnapshotData>>
 getSnapshotPushes()
 {
+    faabric::util::UniqueLock lock(mockMutex);
     return snapshotPushes;
 }
 
 std::vector<std::pair<std::string, std::vector<faabric::util::SnapshotDiff>>>
 getSnapshotDiffPushes()
 {
+    faabric::util::UniqueLock lock(mockMutex);
     return snapshotDiffPushes;
 }
 
 std::vector<std::pair<std::string, std::string>> getSnapshotDeletes()
 {
+    faabric::util::UniqueLock lock(mockMutex);
     return snapshotDeletes;
 }
 
 std::vector<std::pair<std::string, std::pair<uint32_t, int>>> getThreadResults()
 {
+    faabric::util::UniqueLock lock(mockMutex);
     return threadResults;
 }
 
 void clearMockSnapshotRequests()
 {
+    faabric::util::UniqueLock lock(mockMutex);
     snapshotPushes.clear();
     snapshotDiffPushes.clear();
     snapshotDeletes.clear();

From 935dbe5e42914e5429352c4e3d101806bd415117 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 17:02:31 +0000
Subject: [PATCH 30/48] MockMode needs to be atomic to work across threads
 without a mutex

---
 src/util/testing.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/util/testing.cpp b/src/util/testing.cpp
index 8acb052f8..b0036885b 100644
--- a/src/util/testing.cpp
+++ b/src/util/testing.cpp
@@ -1,25 +1,27 @@
 #include <faabric/util/testing.h>
 
+#include <atomic>
+
 namespace faabric::util {
-static bool testMode = false;
-static bool mockMode = false;
+static std::atomic_bool testMode = false;
+static std::atomic_bool mockMode = false;
 
 void setTestMode(bool val)
 {
-    testMode = val;
+    testMode.store(val, std::memory_order_release);
 }
 
 bool isTestMode()
 {
-    return testMode;
+    return testMode.load(std::memory_order_acquire);
 }
 void setMockMode(bool val)
 {
-    mockMode = val;
+    mockMode.store(val, std::memory_order_release);
 }
 
 bool isMockMode()
 {
-    return mockMode;
+    return mockMode.load(std::memory_order_acquire);
 }
 }

From feba949a833f9dd1ba754018cff402abe2d092de Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 17:05:18 +0000
Subject: [PATCH 31/48] Missing mutexes in FunctionCallClient mock code

---
 src/scheduler/FunctionCallClient.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/scheduler/FunctionCallClient.cpp b/src/scheduler/FunctionCallClient.cpp
index c37f3c174..db7490ef5 100644
--- a/src/scheduler/FunctionCallClient.cpp
+++ b/src/scheduler/FunctionCallClient.cpp
@@ -33,11 +33,13 @@ static std::vector<std::pair<std::string, faabric::UnregisterRequest>>
 
 std::vector<std::pair<std::string, faabric::Message>> getFunctionCalls()
 {
+    faabric::util::UniqueLock lock(mockMutex);
     return functionCalls;
 }
 
 std::vector<std::pair<std::string, faabric::EmptyRequest>> getFlushCalls()
 {
+    faabric::util::UniqueLock lock(mockMutex);
     return flushCalls;
 }
 
@@ -45,27 +47,32 @@ std::vector<
   std::pair<std::string, std::shared_ptr<faabric::BatchExecuteRequest>>>
 getBatchRequests()
 {
+    faabric::util::UniqueLock lock(mockMutex);
     return batchMessages;
 }
 
 std::vector<std::pair<std::string, faabric::EmptyRequest>> getResourceRequests()
 {
+    faabric::util::UniqueLock lock(mockMutex);
     return resourceRequests;
 }
 
 std::vector<std::pair<std::string, faabric::UnregisterRequest>>
 getUnregisterRequests()
 {
+    faabric::util::UniqueLock lock(mockMutex);
     return unregisterRequests;
 }
 
 void queueResourceResponse(const std::string& host, faabric::HostResources& res)
 {
+    faabric::util::UniqueLock lock(mockMutex);
     queuedResourceResponses[host].enqueue(res);
 }
 
 void clearMockRequests()
 {
+    faabric::util::UniqueLock lock(mockMutex);
     functionCalls.clear();
     batchMessages.clear();
     resourceRequests.clear();

From ad6265eeba9e33b15480841c0485b46c503eb6f6 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 17:25:15 +0000
Subject: [PATCH 32/48] More missing locks on reads in the scheduler

---
 include/faabric/scheduler/Scheduler.h |  3 ++-
 src/scheduler/Scheduler.cpp           | 19 +++++++++++++++++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/include/faabric/scheduler/Scheduler.h b/include/faabric/scheduler/Scheduler.h
index f1355f02b..1468040f9 100644
--- a/include/faabric/scheduler/Scheduler.h
+++ b/include/faabric/scheduler/Scheduler.h
@@ -123,7 +123,8 @@ class Scheduler
     int getFunctionRegisteredHostCount(const faabric::Message& msg);
 
     std::set<std::string> getFunctionRegisteredHosts(
-      const faabric::Message& msg);
+      const faabric::Message& msg,
+      bool acquireLock = true);
 
     void broadcastFlush();
 
diff --git a/src/scheduler/Scheduler.cpp b/src/scheduler/Scheduler.cpp
index 7e698d2e9..4ae66fa23 100644
--- a/src/scheduler/Scheduler.cpp
+++ b/src/scheduler/Scheduler.cpp
@@ -149,19 +149,26 @@ void Scheduler::shutdown()
 
 long Scheduler::getFunctionExecutorCount(const faabric::Message& msg)
 {
+    faabric::util::SharedLock lock(mx);
     const std::string funcStr = faabric::util::funcToString(msg, false);
     return executors[funcStr].size();
 }
 
 int Scheduler::getFunctionRegisteredHostCount(const faabric::Message& msg)
 {
+    faabric::util::SharedLock lock(mx);
     const std::string funcStr = faabric::util::funcToString(msg, false);
     return (int)registeredHosts[funcStr].size();
 }
 
 std::set<std::string> Scheduler::getFunctionRegisteredHosts(
-  const faabric::Message& msg)
+  const faabric::Message& msg,
+  bool acquireLock)
 {
+    faabric::util::SharedLock lock;
+    if (acquireLock) {
+        lock = faabric::util::SharedLock(mx);
+    }
     const std::string funcStr = faabric::util::funcToString(msg, false);
     return registeredHosts[funcStr];
 }
@@ -169,6 +176,7 @@ std::set<std::string> Scheduler::getFunctionRegisteredHosts(
 void Scheduler::removeRegisteredHost(const std::string& host,
                                      const faabric::Message& msg)
 {
+    faabric::util::FullLock lock(mx);
     const std::string funcStr = faabric::util::funcToString(msg, false);
     registeredHosts[funcStr].erase(host);
 }
@@ -476,7 +484,7 @@ faabric::util::SchedulingDecision Scheduler::doCallFunctions(
     // diffs.
     std::string snapshotKey = firstMsg.snapshotkey();
     if (!snapshotKey.empty()) {
-        for (const auto& host : getFunctionRegisteredHosts(firstMsg)) {
+        for (const auto& host : getFunctionRegisteredHosts(firstMsg, false)) {
             SnapshotClient& c = getSnapshotClient(host);
             faabric::util::SnapshotData snapshotData =
               faabric::snapshot::getSnapshotRegistry().getSnapshot(snapshotKey);
@@ -697,6 +705,7 @@ void Scheduler::callFunction(faabric::Message& msg, bool forceLocal)
 
 void Scheduler::clearRecordedMessages()
 {
+    faabric::util::FullLock lock(mx);
     recordedMessagesAll.clear();
     recordedMessagesLocal.clear();
     recordedMessagesShared.clear();
@@ -704,11 +713,13 @@ void Scheduler::clearRecordedMessages()
 
 std::vector<faabric::Message> Scheduler::getRecordedMessagesAll()
 {
+    faabric::util::SharedLock lock(mx);
     return recordedMessagesAll;
 }
 
 std::vector<faabric::Message> Scheduler::getRecordedMessagesLocal()
 {
+    faabric::util::SharedLock lock(mx);
     return recordedMessagesLocal;
 }
 
@@ -736,6 +747,7 @@ SnapshotClient& Scheduler::getSnapshotClient(const std::string& otherHost)
 std::vector<std::pair<std::string, faabric::Message>>
 Scheduler::getRecordedMessagesShared()
 {
+    faabric::util::SharedLock lock(mx);
     return recordedMessagesShared;
 }
 
@@ -783,11 +795,13 @@ std::shared_ptr<Executor> Scheduler::claimExecutor(
 
 std::string Scheduler::getThisHost()
 {
+    faabric::util::SharedLock lock(mx);
     return thisHost;
 }
 
 void Scheduler::broadcastFlush()
 {
+    faabric::util::FullLock lock(mx);
     // Get all hosts
     std::set<std::string> allHosts = getAvailableHosts();
 
@@ -799,6 +813,7 @@ void Scheduler::broadcastFlush()
         getFunctionCallClient(otherHost).sendFlush();
     }
 
+    lock.unlock();
     flushLocally();
 }
 

From e87360c703af9c0964a4cfce357511be5f4afe51 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 17:30:58 +0000
Subject: [PATCH 33/48] Missing locks on *ThisHostResources in Scheduler

---
 src/scheduler/Scheduler.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/scheduler/Scheduler.cpp b/src/scheduler/Scheduler.cpp
index 4ae66fa23..01f26c3f3 100644
--- a/src/scheduler/Scheduler.cpp
+++ b/src/scheduler/Scheduler.cpp
@@ -983,13 +983,16 @@ faabric::Message Scheduler::getFunctionResult(unsigned int messageId,
 
 faabric::HostResources Scheduler::getThisHostResources()
 {
-    thisHostResources.set_usedslots(
+    faabric::util::SharedLock lock(mx);
+    faabric::HostResources hostResources = thisHostResources;
+    hostResources.set_usedslots(
       this->thisHostUsedSlots.load(std::memory_order_acquire));
-    return thisHostResources;
+    return hostResources;
 }
 
 void Scheduler::setThisHostResources(faabric::HostResources& res)
 {
+    faabric::util::FullLock lock(mx);
     thisHostResources = res;
     this->thisHostUsedSlots.store(res.usedslots(), std::memory_order_release);
 }

From 0d7d73b39b6100a2a21a3518e998315bff4adc64 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 17:53:04 +0000
Subject: [PATCH 34/48] Skip mpi::allGather as I don't know what this exactly
 does

---
 thread-sanitizer-ignorelist.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/thread-sanitizer-ignorelist.txt b/thread-sanitizer-ignorelist.txt
index 0bbec8150..893bb6784 100644
--- a/thread-sanitizer-ignorelist.txt
+++ b/thread-sanitizer-ignorelist.txt
@@ -8,3 +8,6 @@ race:zmq::*
 race:faabric::util::SystemConfig::*
 # Catch2 allocates in its signal handler, this prevents showing the wrong crash report
 signal:*
+
+# TODO: Remove: There's something weird going on with MPI allGather
+race:faabric::scheduler::MpiWorld::allGather

From 54ece019f44d86a519b5f87623be174c8c4e789e Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 18:14:13 +0000
Subject: [PATCH 35/48] Catch2 REQUIRE is not thread-safe, use assert

---
 tests/test/scheduler/test_remote_mpi_worlds.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test/scheduler/test_remote_mpi_worlds.cpp b/tests/test/scheduler/test_remote_mpi_worlds.cpp
index 63f6ff4d7..eb2a90eac 100644
--- a/tests/test/scheduler/test_remote_mpi_worlds.cpp
+++ b/tests/test/scheduler/test_remote_mpi_worlds.cpp
@@ -170,7 +170,7 @@ TEST_CASE_METHOD(RemoteMpiTestFixture,
                           messageData2.size(),
                           MPI_STATUS_IGNORE);
           std::vector<int> actual(buffer, buffer + messageData2.size());
-          REQUIRE(actual == messageData2);
+          assert(actual == messageData2);
 
           testLatch->wait();
 

From 12b3d3b0ae76b653c3493b93680f44da4f9ee47e Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 19:11:13 +0000
Subject: [PATCH 36/48] TSan snapshot fixes

---
 include/faabric/snapshot/SnapshotServer.h     |  6 +++++
 src/snapshot/SnapshotServer.cpp               |  7 ++++++
 .../snapshot/test_snapshot_client_server.cpp  | 22 +++++++++++++++++--
 thread-sanitizer-ignorelist.txt               |  4 ++--
 4 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/include/faabric/snapshot/SnapshotServer.h b/include/faabric/snapshot/SnapshotServer.h
index 3d79cb98d..4f95c4602 100644
--- a/include/faabric/snapshot/SnapshotServer.h
+++ b/include/faabric/snapshot/SnapshotServer.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <atomic>
+
 #include <faabric/flat/faabric_generated.h>
 #include <faabric/scheduler/Scheduler.h>
 #include <faabric/snapshot/SnapshotApi.h>
@@ -12,6 +14,9 @@ class SnapshotServer final : public faabric::transport::MessageEndpointServer
   public:
     SnapshotServer();
 
+    // Returns how many diffs have been applied since started, useful for testing
+    size_t diffsApplied() const;
+
   protected:
     void doAsyncRecv(int header,
                      const uint8_t* buffer,
@@ -34,5 +39,6 @@ class SnapshotServer final : public faabric::transport::MessageEndpointServer
 
   private:
     faabric::transport::PointToPointBroker& broker;
+    std::atomic_size_t diffsAppliedCounter;
 };
 }
diff --git a/src/snapshot/SnapshotServer.cpp b/src/snapshot/SnapshotServer.cpp
index 108e3c5d6..a14ce5aef 100644
--- a/src/snapshot/SnapshotServer.cpp
+++ b/src/snapshot/SnapshotServer.cpp
@@ -22,6 +22,10 @@ SnapshotServer::SnapshotServer()
   , broker(faabric::transport::getPointToPointBroker())
 {}
 
+size_t SnapshotServer::diffsApplied() const {
+    return diffsAppliedCounter.load(std::memory_order_acquire);
+}
+
 void SnapshotServer::doAsyncRecv(int header,
                                  const uint8_t* buffer,
                                  size_t bufferSize)
@@ -196,6 +200,9 @@ SnapshotServer::recvPushSnapshotDiffs(const uint8_t* buffer, size_t bufferSize)
                 throw std::runtime_error("Unsupported merge data type");
             }
         }
+        // make changes visible to other threads
+        std::atomic_thread_fence(std::memory_order_release);
+        this->diffsAppliedCounter.fetch_add(1, std::memory_order_acq_rel);
     }
 
     // Unlock group if exists
diff --git a/tests/test/snapshot/test_snapshot_client_server.cpp b/tests/test/snapshot/test_snapshot_client_server.cpp
index cd52fd883..a2097e1c2 100644
--- a/tests/test/snapshot/test_snapshot_client_server.cpp
+++ b/tests/test/snapshot/test_snapshot_client_server.cpp
@@ -149,6 +149,8 @@ TEST_CASE_METHOD(SnapshotClientServerFixture,
     std::vector<SnapshotDiff> diffsA;
     std::vector<SnapshotDiff> diffsB;
 
+    size_t originalDiffsApplied = server.diffsApplied();
+
     SnapshotDiff diffA1(SnapshotDataType::Raw,
                         SnapshotMergeOperation::Overwrite,
                         5,
@@ -172,6 +174,10 @@ TEST_CASE_METHOD(SnapshotClientServerFixture,
     diffsB = { diffB };
     cli.pushSnapshotDiffs(snapKey, groupIdB, diffsB);
 
+    // Ensure the right number of diffs is applied
+    // Also acts as a memory barrier for TSan
+    REQUIRE(server.diffsApplied() == originalDiffsApplied + 3);
+
     // Check changes have been applied
     checkDiffsApplied(snap.data, diffsA);
     checkDiffsApplied(snap.data, diffsB);
@@ -223,9 +229,15 @@ TEST_CASE_METHOD(SnapshotClientServerFixture,
     diffA2.operation = SnapshotMergeOperation::Sum;
     diffA2.dataType = SnapshotDataType::Int;
 
+    size_t originalDiffsApplied = server.diffsApplied();
+
     diffs = { diffA1, diffA2 };
     cli.pushSnapshotDiffs(snapKey, 0, diffs);
 
+    // Ensure the right number of diffs is applied
+    // Also acts as a memory barrier for TSan
+    REQUIRE(server.diffsApplied() == originalDiffsApplied + 2);
+
     // Check diffs have been applied according to the merge operations
     REQUIRE(*basePtrA1 == baseA1 + diffIntA1);
     REQUIRE(*basePtrA2 == baseA2 + diffIntA2);
@@ -337,9 +349,15 @@ TEST_CASE_METHOD(SnapshotClientServerFixture,
     diff.operation = operation;
     diff.dataType = dataType;
 
+    size_t originalDiffsApplied = server.diffsApplied();
+
     std::vector<SnapshotDiff> diffs = { diff };
     cli.pushSnapshotDiffs(snapKey, 0, diffs);
 
+    // Ensure the right number of diffs is applied
+    // Also acts as a memory barrier for TSan
+    REQUIRE(server.diffsApplied() == originalDiffsApplied + 1);
+
     // Check data is as expected
     std::vector<uint8_t> actualData(snap.data + offset,
                                     snap.data + offset + expectedData.size());
@@ -367,13 +385,13 @@ TEST_CASE_METHOD(SnapshotClientServerFixture,
     std::thread tA([threadIdA, returnValueA] {
         faabric::scheduler::Scheduler& sch = faabric::scheduler::getScheduler();
         int32_t r = sch.awaitThreadResult(threadIdA);
-        REQUIRE(r == returnValueA);
+        assert(r == returnValueA);
     });
 
     std::thread tB([threadIdB, returnValueB] {
         faabric::scheduler::Scheduler& sch = faabric::scheduler::getScheduler();
         int32_t r = sch.awaitThreadResult(threadIdB);
-        REQUIRE(r == returnValueB);
+        assert(r == returnValueB);
     });
 
     if (tA.joinable()) {
diff --git a/thread-sanitizer-ignorelist.txt b/thread-sanitizer-ignorelist.txt
index 893bb6784..b8f2d8b3b 100644
--- a/thread-sanitizer-ignorelist.txt
+++ b/thread-sanitizer-ignorelist.txt
@@ -9,5 +9,5 @@ race:faabric::util::SystemConfig::*
 # Catch2 allocates in its signal handler, this prevents showing the wrong crash report
 signal:*
 
-# TODO: Remove: There's something weird going on with MPI allGather
-race:faabric::scheduler::MpiWorld::allGather
+# TODO: Remove: There's something weird going on with MPI code I don't understand
+race:faabric::scheduler::MpiWorld::*

From 0d896a59dc1c3959a4352bdaa794ed967f8c7448 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 19:15:44 +0000
Subject: [PATCH 37/48] Run TSan with maximum history size so it doesn't lose
 stack traces

---
 .github/workflows/sanitisers.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index cf4b78071..3e49b8227 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -134,7 +134,7 @@ jobs:
         run: ./bin/faabric_tests > /dev/null
         working-directory: /build/faabric/static
         env:
-          TSAN_OPTIONS: "verbosity=1:halt_on_error=1:suppressions=/code/faabric/thread-sanitizer-ignorelist.txt"
+          TSAN_OPTIONS: "verbosity=1:halt_on_error=1:suppressions=/code/faabric/thread-sanitizer-ignorelist.txt:history_size=7"
 
   undefined-sanitiser:
     if: github.event.pull_request.draft == false

From 7e2e81f9cab2af062fd1c4a73cd2a3a7d2da5446 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 19:17:45 +0000
Subject: [PATCH 38/48] Missing shared lock in KV State

---
 src/state/State.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/state/State.cpp b/src/state/State.cpp
index c522ac758..38b38e97e 100644
--- a/src/state/State.cpp
+++ b/src/state/State.cpp
@@ -173,6 +173,7 @@ std::shared_ptr<StateKeyValue> State::doGetKV(const std::string& user,
 
 size_t State::getKVCount()
 {
+    faabric::util::SharedLock lock(mapMutex);
     return kvMap.size();
 }
 

From 7e0a55a645397b524ffa9dfd677afd14ea8bcac9 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 19:42:19 +0000
Subject: [PATCH 39/48] Disable two tests that break in TSan builds

---
 tests/test/transport/test_message_endpoint_client.cpp | 3 +++
 tests/test/transport/test_message_server.cpp          | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/tests/test/transport/test_message_endpoint_client.cpp b/tests/test/transport/test_message_endpoint_client.cpp
index 307639a65..09ce5abda 100644
--- a/tests/test/transport/test_message_endpoint_client.cpp
+++ b/tests/test/transport/test_message_endpoint_client.cpp
@@ -142,6 +142,8 @@ TEST_CASE_METHOD(SchedulerTestFixture,
     }
 }
 
+// This test hangs ThreadSanitizer
+#if !(defined(__has_feature) && __has_feature(thread_sanitizer))
 TEST_CASE_METHOD(SchedulerTestFixture,
                  "Test send/recv many messages from many clients",
                  "[transport]")
@@ -181,6 +183,7 @@ TEST_CASE_METHOD(SchedulerTestFixture,
         }
     }
 }
+#endif
 
 TEST_CASE_METHOD(SchedulerTestFixture,
                  "Test can't set invalid send/recv timeouts",
diff --git a/tests/test/transport/test_message_server.cpp b/tests/test/transport/test_message_server.cpp
index 65ffd35cd..7d798587a 100644
--- a/tests/test/transport/test_message_server.cpp
+++ b/tests/test/transport/test_message_server.cpp
@@ -177,6 +177,8 @@ TEST_CASE("Test sending response to client", "[transport]")
     server.stop();
 }
 
+// This test hangs ThreadSanitizer
+#if !(defined(__has_feature) && __has_feature(thread_sanitizer))
 TEST_CASE("Test multiple clients talking to one server", "[transport]")
 {
     EchoServer server;
@@ -215,6 +217,7 @@ TEST_CASE("Test multiple clients talking to one server", "[transport]")
 
     server.stop();
 }
+#endif
 
 TEST_CASE("Test client timeout on requests to valid server", "[transport]")
 {

From a6ae467e245b248d55f3caef60009fc47807c842 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 19:42:44 +0000
Subject: [PATCH 40/48] Stop gid generation from tripping TSan using relaxed
 atomics

---
 src/util/gids.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/util/gids.cpp b/src/util/gids.cpp
index 8e9312ecc..90d430823 100644
--- a/src/util/gids.cpp
+++ b/src/util/gids.cpp
@@ -7,7 +7,7 @@
 #include <faabric/util/random.h>
 
 static std::atomic_int counter = 0;
-static std::size_t gidKeyHash = 0;
+static std::atomic_size_t gidKeyHash = 0;
 static std::mutex gidMx;
 
 #define GID_LEN 20
@@ -15,7 +15,7 @@ static std::mutex gidMx;
 namespace faabric::util {
 unsigned int generateGid()
 {
-    if (gidKeyHash == 0) {
+    if (gidKeyHash.load(std::memory_order_relaxed) == 0) {
         faabric::util::UniqueLock lock(gidMx);
         if (gidKeyHash == 0) {
             // Generate random hash
@@ -24,7 +24,7 @@ unsigned int generateGid()
         }
     }
 
-    unsigned int intHash = gidKeyHash % INT32_MAX;
+    unsigned int intHash = gidKeyHash.load(std::memory_order_relaxed) % INT32_MAX;
     unsigned int result = intHash + counter.fetch_add(1);
     if (result) {
         return result;

From f7436b5f8d3af1ade485e8d6979deb7880843efd Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 19:44:53 +0000
Subject: [PATCH 41/48] Avoid racy destruction on FlagWaiter just like the
 Latch

---
 .../faabric/transport/PointToPointBroker.h    |  4 ++--
 include/faabric/util/locks.h                  |  3 ++-
 src/transport/PointToPointBroker.cpp          | 20 ++++++++++---------
 src/util/locks.cpp                            |  4 ++++
 tests/test/util/test_locks.cpp                | 12 +++++------
 5 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/include/faabric/transport/PointToPointBroker.h b/include/faabric/transport/PointToPointBroker.h
index a596a1fb5..998f2b28c 100644
--- a/include/faabric/transport/PointToPointBroker.h
+++ b/include/faabric/transport/PointToPointBroker.h
@@ -125,9 +125,9 @@ class PointToPointBroker
     std::unordered_map<int, std::set<int>> groupIdIdxsMap;
     std::unordered_map<std::string, std::string> mappings;
 
-    std::unordered_map<int, faabric::util::FlagWaiter> groupFlags;
+    std::unordered_map<int, std::shared_ptr<faabric::util::FlagWaiter>> groupFlags;
 
-    faabric::util::FlagWaiter& getGroupFlag(int groupId);
+    std::shared_ptr<faabric::util::FlagWaiter> getGroupFlag(int groupId);
 };
 
 PointToPointBroker& getPointToPointBroker();
diff --git a/include/faabric/util/locks.h b/include/faabric/util/locks.h
index 1ab7e9fad..3dc5e3488 100644
--- a/include/faabric/util/locks.h
+++ b/include/faabric/util/locks.h
@@ -4,6 +4,7 @@
 
 #include <atomic>
 #include <condition_variable>
+#include <memory>
 #include <mutex>
 #include <shared_mutex>
 
@@ -14,7 +15,7 @@ typedef std::unique_lock<std::mutex> UniqueLock;
 typedef std::unique_lock<std::shared_mutex> FullLock;
 typedef std::shared_lock<std::shared_mutex> SharedLock;
 
-class FlagWaiter
+class FlagWaiter : public std::enable_shared_from_this<FlagWaiter>
 {
   public:
     FlagWaiter(int timeoutMsIn = DEFAULT_FLAG_WAIT_MS);
diff --git a/src/transport/PointToPointBroker.cpp b/src/transport/PointToPointBroker.cpp
index 4d9b23fad..67bdac55c 100644
--- a/src/transport/PointToPointBroker.cpp
+++ b/src/transport/PointToPointBroker.cpp
@@ -432,7 +432,7 @@ PointToPointBroker::setUpLocalMappingsFromSchedulingDecision(
     SPDLOG_TRACE(
       "Enabling point-to-point mapping for {}:{}", decision.appId, groupId);
 
-    getGroupFlag(groupId).setFlag(true);
+    getGroupFlag(groupId)->setFlag(true);
 
     return hosts;
 }
@@ -470,27 +470,29 @@ void PointToPointBroker::setAndSendMappingsFromSchedulingDecision(
     }
 }
 
-faabric::util::FlagWaiter& PointToPointBroker::getGroupFlag(int groupId)
+std::shared_ptr<faabric::util::FlagWaiter> PointToPointBroker::getGroupFlag(
+  int groupId)
 {
+    faabric::util::SharedLock lock(brokerMutex);
     if (groupFlags.find(groupId) == groupFlags.end()) {
+        lock.unlock();
         faabric::util::FullLock lock(brokerMutex);
         if (groupFlags.find(groupId) == groupFlags.end()) {
-            return groupFlags[groupId];
+            return groupFlags
+              .emplace(groupId, std::make_shared<faabric::util::FlagWaiter>())
+              .first->second;
         }
     }
 
-    {
-        faabric::util::SharedLock lock(brokerMutex);
-        return groupFlags.at(groupId);
-    }
+    return groupFlags.at(groupId);
 }
 
 void PointToPointBroker::waitForMappingsOnThisHost(int groupId)
 {
-    faabric::util::FlagWaiter& waiter = getGroupFlag(groupId);
+    auto waiter = getGroupFlag(groupId);
 
     // Check if it's been enabled
-    waiter.waitOnFlag();
+    waiter->waitOnFlag();
 }
 
 std::set<int> PointToPointBroker::getIdxsRegisteredForGroup(int groupId)
diff --git a/src/util/locks.cpp b/src/util/locks.cpp
index 14064effa..a16079f22 100644
--- a/src/util/locks.cpp
+++ b/src/util/locks.cpp
@@ -8,6 +8,8 @@ FlagWaiter::FlagWaiter(int timeoutMsIn)
 
 void FlagWaiter::waitOnFlag()
 {
+    // Keep the this shared_ptr alive to prevent heap-use-after-free
+    std::shared_ptr<FlagWaiter> _keepMeAlive = shared_from_this();
     // Check
     if (flag.load()) {
         return;
@@ -26,6 +28,8 @@ void FlagWaiter::waitOnFlag()
 
 void FlagWaiter::setFlag(bool value)
 {
+    // Keep the this shared_ptr alive to prevent heap-use-after-free
+    std::shared_ptr<FlagWaiter> _keepMeAlive = shared_from_this();
     UniqueLock lock(flagMx);
     flag.store(value);
     cv.notify_all();
diff --git a/tests/test/util/test_locks.cpp b/tests/test/util/test_locks.cpp
index 839adbf85..1db7da17f 100644
--- a/tests/test/util/test_locks.cpp
+++ b/tests/test/util/test_locks.cpp
@@ -13,8 +13,8 @@ TEST_CASE("Test wait flag", "[util]")
 {
     int nThreads = 10;
 
-    FlagWaiter flagA;
-    FlagWaiter flagB;
+    auto flagA = std::make_shared<FlagWaiter>();
+    auto flagB = std::make_shared<FlagWaiter>();
 
     std::shared_ptr<Latch> latchA1 = Latch::create(nThreads + 1);
     std::shared_ptr<Latch> latchB1 = Latch::create(nThreads + 1);
@@ -36,14 +36,14 @@ TEST_CASE("Test wait flag", "[util]")
 
         threadsA.emplace_back([&flagA, &resultsA, &latchA1, &latchA2, i] {
             latchA1->wait();
-            flagA.waitOnFlag();
+            flagA->waitOnFlag();
             resultsA[i] = i;
             latchA2->wait();
         });
 
         threadsB.emplace_back([&flagB, &resultsB, &latchB1, &latchB2, i] {
             latchB1->wait();
-            flagB.waitOnFlag();
+            flagB->waitOnFlag();
             resultsB[i] = i;
             latchB2->wait();
         });
@@ -56,13 +56,13 @@ TEST_CASE("Test wait flag", "[util]")
     REQUIRE(resultsB == expectedUnset);
 
     // Set one flag, await latch and check
-    flagA.setFlag(true);
+    flagA->setFlag(true);
     latchA2->wait();
     REQUIRE(resultsA == expectedSet);
     REQUIRE(resultsB == expectedUnset);
 
     // Set other flag, await latch and check
-    flagB.setFlag(true);
+    flagB->setFlag(true);
     latchB2->wait();
     REQUIRE(resultsA == expectedSet);
     REQUIRE(resultsB == expectedSet);

From d77f4aeb87011e4a3717b63f702bdbd3eac0e00e Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 19:53:23 +0000
Subject: [PATCH 42/48] Missing lock on a queue push

---
 src/transport/PointToPointBroker.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/transport/PointToPointBroker.cpp b/src/transport/PointToPointBroker.cpp
index 67bdac55c..d09e5a4da 100644
--- a/src/transport/PointToPointBroker.cpp
+++ b/src/transport/PointToPointBroker.cpp
@@ -182,8 +182,11 @@ void PointToPointGroup::lock(int groupIdx, bool recursive)
             // Notify remote locker that they've acquired the lock
             notifyLocked(groupIdx);
         } else {
-            // Need to wait to get the lock
-            lockWaiters.push(groupIdx);
+            {
+                faabric::util::UniqueLock lock(mx);
+                // Need to wait to get the lock
+                lockWaiters.push(groupIdx);
+            }
 
             // Wait here if local, otherwise the remote end will pick up the
             // message
@@ -246,14 +249,14 @@ void PointToPointGroup::unlock(int groupIdx, bool recursive)
       ptpBroker.getHostForReceiver(groupId, POINT_TO_POINT_MASTER_IDX);
 
     if (host == conf.endpointHost) {
+        faabric::util::UniqueLock lock(mx);
+
         SPDLOG_TRACE("Group idx {} unlocking {} ({} waiters, recursive {})",
                      groupIdx,
                      groupId,
                      lockWaiters.size(),
                      recursive);
 
-        faabric::util::UniqueLock lock(mx);
-
         if (recursive) {
             recursiveLockOwners.pop();
 

From e8ac90c96590f5855aaf28c0c88e1ebecaa7d29c Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Fri, 26 Nov 2021 20:06:25 +0000
Subject: [PATCH 43/48] Fix formatting

---
 include/faabric/snapshot/SnapshotServer.h      | 3 ++-
 include/faabric/transport/PointToPointBroker.h | 3 ++-
 src/snapshot/SnapshotServer.cpp                | 3 ++-
 src/util/gids.cpp                              | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/include/faabric/snapshot/SnapshotServer.h b/include/faabric/snapshot/SnapshotServer.h
index 4f95c4602..93a8cb6fa 100644
--- a/include/faabric/snapshot/SnapshotServer.h
+++ b/include/faabric/snapshot/SnapshotServer.h
@@ -14,7 +14,8 @@ class SnapshotServer final : public faabric::transport::MessageEndpointServer
   public:
     SnapshotServer();
 
-    // Returns how many diffs have been applied since started, useful for testing
+    // Returns how many diffs have been applied since started, useful for
+    // testing
     size_t diffsApplied() const;
 
   protected:
diff --git a/include/faabric/transport/PointToPointBroker.h b/include/faabric/transport/PointToPointBroker.h
index 998f2b28c..81d4f10b4 100644
--- a/include/faabric/transport/PointToPointBroker.h
+++ b/include/faabric/transport/PointToPointBroker.h
@@ -125,7 +125,8 @@ class PointToPointBroker
     std::unordered_map<int, std::set<int>> groupIdIdxsMap;
     std::unordered_map<std::string, std::string> mappings;
 
-    std::unordered_map<int, std::shared_ptr<faabric::util::FlagWaiter>> groupFlags;
+    std::unordered_map<int, std::shared_ptr<faabric::util::FlagWaiter>>
+      groupFlags;
 
     std::shared_ptr<faabric::util::FlagWaiter> getGroupFlag(int groupId);
 };
diff --git a/src/snapshot/SnapshotServer.cpp b/src/snapshot/SnapshotServer.cpp
index a14ce5aef..e864d9f6b 100644
--- a/src/snapshot/SnapshotServer.cpp
+++ b/src/snapshot/SnapshotServer.cpp
@@ -22,7 +22,8 @@ SnapshotServer::SnapshotServer()
   , broker(faabric::transport::getPointToPointBroker())
 {}
 
-size_t SnapshotServer::diffsApplied() const {
+size_t SnapshotServer::diffsApplied() const
+{
     return diffsAppliedCounter.load(std::memory_order_acquire);
 }
 
diff --git a/src/util/gids.cpp b/src/util/gids.cpp
index 90d430823..60082b2c4 100644
--- a/src/util/gids.cpp
+++ b/src/util/gids.cpp
@@ -24,7 +24,8 @@ unsigned int generateGid()
         }
     }
 
-    unsigned int intHash = gidKeyHash.load(std::memory_order_relaxed) % INT32_MAX;
+    unsigned int intHash =
+      gidKeyHash.load(std::memory_order_relaxed) % INT32_MAX;
     unsigned int result = intHash + counter.fetch_add(1);
     if (result) {
         return result;

From d1ac721a7b7efcb6adad99a393814255ad5233cb Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Mon, 29 Nov 2021 16:54:44 +0000
Subject: [PATCH 44/48] Address review comments

---
 .github/workflows/sanitisers.yml |   1 -
 CMakeLists.txt                   |   2 +
 src/redis/Redis.cpp              | 167 ++++++++++++++-----------------
 src/scheduler/Executor.cpp       |   2 +-
 src/scheduler/Scheduler.cpp      |   1 +
 src/state/StateKeyValue.cpp      |   8 +-
 src/util/testing.cpp             |   4 +-
 tasks/dev.py                     |   3 +-
 8 files changed, 87 insertions(+), 101 deletions(-)

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index 3e49b8227..4d105f68b 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -1,6 +1,5 @@
 name: Sanitiser Checks
 
-# on: workflow_dispatch
 on:
   push:
     branches: [master]
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7f361fa15..765ec9a58 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,6 +33,8 @@ elseif (FAABRIC_USE_SANITISER STREQUAL "Leak")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=leak")
 elseif (FAABRIC_USE_SANITISER STREQUAL "Memory")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=memory")
+elseif (NOT ((FAABRIC_USE_SANITISER STREQUAL "None") OR (NOT FAABRIC_USE_SANITISER)))
+    message(FATAL_ERROR "Invalid FAABRIC_USE_SANITISER setting: ${FAABRIC_USE_SANITISER}")
 endif()
 
 # Global include dir
diff --git a/src/redis/Redis.cpp b/src/redis/Redis.cpp
index de7fe81ad..5a9a4c3f2 100644
--- a/src/redis/Redis.cpp
+++ b/src/redis/Redis.cpp
@@ -1,5 +1,6 @@
 #include <faabric/redis/Redis.h>
 
+#include <cstdarg>
 #include <faabric/util/bytes.h>
 #include <faabric/util/config.h>
 #include <faabric/util/gids.h>
@@ -16,6 +17,16 @@ UniqueRedisReply wrapReply(redisReply* r)
     return UniqueRedisReply(r, &freeReplyObject);
 }
 
+UniqueRedisReply safeRedisCommand(redisContext* c, const char* format, ...)
+{
+    va_list args;
+    va_start(args, format);
+    UniqueRedisReply reply =
+      wrapReply((redisReply*)redisvCommand(c, format, args));
+    va_end(args);
+    return reply;
+}
+
 RedisInstance::RedisInstance(RedisRole roleIn)
   : role(roleIn)
 {
@@ -52,7 +63,7 @@ RedisInstance::RedisInstance(RedisRole roleIn)
 std::string RedisInstance::loadScript(redisContext* context,
                                       const std::string_view scriptBody)
 {
-    auto reply = (redisReply*)redisCommand(
+    auto reply = safeRedisCommand(
       context, "SCRIPT LOAD %b", scriptBody.data(), scriptBody.size());
 
     if (reply == nullptr) {
@@ -64,7 +75,6 @@ std::string RedisInstance::loadScript(redisContext* context,
     }
 
     std::string scriptSha = reply->str;
-    freeReplyObject(reply);
 
     return scriptSha;
 }
@@ -178,7 +188,7 @@ void Redis::ping()
 {
 
     SPDLOG_DEBUG("Pinging redis at {}", instance.hostname);
-    auto reply = wrapReply((redisReply*)redisCommand(context, "PING"));
+    auto reply = safeRedisCommand(context, "PING");
 
     std::string response(reply->str);
 
@@ -192,8 +202,7 @@ void Redis::ping()
 
 size_t Redis::strlen(const std::string& key)
 {
-    auto reply =
-      wrapReply((redisReply*)redisCommand(context, "STRLEN %s", key.c_str()));
+    auto reply = safeRedisCommand(context, "STRLEN %s", key.c_str());
 
     size_t result = reply->integer;
     return result;
@@ -201,26 +210,23 @@ size_t Redis::strlen(const std::string& key)
 
 void Redis::get(const std::string& key, uint8_t* buffer, size_t size)
 {
-    auto reply =
-      wrapReply((redisReply*)redisCommand(context, "GET %s", key.c_str()));
+    auto reply = safeRedisCommand(context, "GET %s", key.c_str());
 
     getBytesFromReply(key, *reply, buffer, size);
 }
 
 std::vector<uint8_t> Redis::get(const std::string& key)
 {
-    auto reply =
-      wrapReply((redisReply*)redisCommand(context, "GET %s", key.c_str()));
+    auto reply = safeRedisCommand(context, "GET %s", key.c_str());
 
-    const std::vector<uint8_t> replyBytes = getBytesFromReply(*reply);
+    std::vector<uint8_t> replyBytes = getBytesFromReply(*reply);
 
     return replyBytes;
 }
 
 long Redis::getCounter(const std::string& key)
 {
-    auto reply =
-      wrapReply((redisReply*)redisCommand(context, "GET %s", key.c_str()));
+    auto reply = safeRedisCommand(context, "GET %s", key.c_str());
 
     if (reply == nullptr || reply->type == REDIS_REPLY_NIL || reply->len == 0) {
         return 0;
@@ -231,8 +237,7 @@ long Redis::getCounter(const std::string& key)
 
 long Redis::incr(const std::string& key)
 {
-    auto reply =
-      wrapReply((redisReply*)redisCommand(context, "INCR %s", key.c_str()));
+    auto reply = safeRedisCommand(context, "INCR %s", key.c_str());
 
     long result = reply->integer;
     return result;
@@ -240,8 +245,7 @@ long Redis::incr(const std::string& key)
 
 long Redis::decr(const std::string& key)
 {
-    auto reply =
-      wrapReply((redisReply*)redisCommand(context, "DECR %s", key.c_str()));
+    auto reply = safeRedisCommand(context, "DECR %s", key.c_str());
     long result = reply->integer;
 
     return result;
@@ -251,8 +255,7 @@ long Redis::incrByLong(const std::string& key, long val)
 {
     // Format is NOT printf compatible contrary to what the docs say, hence %i
     // instead of %l.
-    auto reply = wrapReply(
-      (redisReply*)redisCommand(context, "INCRBY %s %i", key.c_str(), val));
+    auto reply = safeRedisCommand(context, "INCRBY %s %i", key.c_str(), val);
 
     long result = reply->integer;
 
@@ -263,8 +266,7 @@ long Redis::decrByLong(const std::string& key, long val)
 {
     // Format is NOT printf compatible contrary to what the docs say, hence %i
     // instead of %l.
-    auto reply = wrapReply(
-      (redisReply*)redisCommand(context, "DECRBY %s %i", key.c_str(), val));
+    auto reply = safeRedisCommand(context, "DECRBY %s %i", key.c_str(), val);
 
     long result = reply->integer;
 
@@ -278,8 +280,8 @@ void Redis::set(const std::string& key, const std::vector<uint8_t>& value)
 
 void Redis::set(const std::string& key, const uint8_t* value, size_t size)
 {
-    auto reply = wrapReply((redisReply*)redisCommand(
-      context, "SET %s %b", key.c_str(), value, size));
+    auto reply =
+      safeRedisCommand(context, "SET %s %b", key.c_str(), value, size);
 
     if (reply->type == REDIS_REPLY_ERROR) {
         SPDLOG_ERROR("Failed to SET {} - {}", key.c_str(), reply->str);
@@ -288,8 +290,7 @@ void Redis::set(const std::string& key, const uint8_t* value, size_t size)
 
 void Redis::del(const std::string& key)
 {
-    auto reply =
-      wrapReply((redisReply*)redisCommand(context, "DEL %s", key.c_str()));
+    safeRedisCommand(context, "DEL %s", key.c_str());
 }
 
 void Redis::setRange(const std::string& key,
@@ -297,8 +298,8 @@ void Redis::setRange(const std::string& key,
                      const uint8_t* value,
                      size_t size)
 {
-    auto reply = wrapReply((redisReply*)redisCommand(
-      context, "SETRANGE %s %li %b", key.c_str(), offset, value, size));
+    auto reply = safeRedisCommand(
+      context, "SETRANGE %s %li %b", key.c_str(), offset, value, size);
 
     if (reply->type != REDIS_REPLY_INTEGER) {
         SPDLOG_ERROR("Failed SETRANGE {}", key);
@@ -333,8 +334,8 @@ void Redis::flushPipeline(long pipelineLength)
 
 void Redis::sadd(const std::string& key, const std::string& value)
 {
-    auto reply = wrapReply((redisReply*)redisCommand(
-      context, "SADD %s %s", key.c_str(), value.c_str()));
+    auto reply =
+      safeRedisCommand(context, "SADD %s %s", key.c_str(), value.c_str());
     if (reply->type == REDIS_REPLY_ERROR) {
         SPDLOG_ERROR("Failed to add {} to set {}", value, key);
         throw std::runtime_error("Failed to add element to set");
@@ -343,14 +344,13 @@ void Redis::sadd(const std::string& key, const std::string& value)
 
 void Redis::srem(const std::string& key, const std::string& value)
 {
-    auto reply = wrapReply((redisReply*)redisCommand(
-      context, "SREM %s %s", key.c_str(), value.c_str()));
+
+    safeRedisCommand(context, "SREM %s %s", key.c_str(), value.c_str());
 }
 
 long Redis::scard(const std::string& key)
 {
-    auto reply =
-      wrapReply((redisReply*)redisCommand(context, "SCARD %s", key.c_str()));
+    auto reply = safeRedisCommand(context, "SCARD %s", key.c_str());
 
     long res = reply->integer;
 
@@ -359,8 +359,8 @@ long Redis::scard(const std::string& key)
 
 bool Redis::sismember(const std::string& key, const std::string& value)
 {
-    auto reply = wrapReply((redisReply*)redisCommand(
-      context, "SISMEMBER %s %s", key.c_str(), value.c_str()));
+    auto reply =
+      safeRedisCommand(context, "SISMEMBER %s %s", key.c_str(), value.c_str());
 
     bool res = reply->integer == 1;
 
@@ -369,8 +369,7 @@ bool Redis::sismember(const std::string& key, const std::string& value)
 
 std::string Redis::srandmember(const std::string& key)
 {
-    auto reply = wrapReply(
-      (redisReply*)redisCommand(context, "SRANDMEMBER %s", key.c_str()));
+    auto reply = safeRedisCommand(context, "SRANDMEMBER %s", key.c_str());
 
     std::string res;
     if (reply->len > 0) {
@@ -392,8 +391,7 @@ std::set<std::string> extractStringSetFromReply(const redisReply& reply)
 
 std::set<std::string> Redis::smembers(const std::string& key)
 {
-    auto reply =
-      wrapReply((redisReply*)redisCommand(context, "SMEMBERS %s", key.c_str()));
+    auto reply = safeRedisCommand(context, "SMEMBERS %s", key.c_str());
     std::set<std::string> result = extractStringSetFromReply(*reply);
 
     return result;
@@ -402,8 +400,8 @@ std::set<std::string> Redis::smembers(const std::string& key)
 std::set<std::string> Redis::sinter(const std::string& keyA,
                                     const std::string& keyB)
 {
-    auto reply = wrapReply((redisReply*)redisCommand(
-      context, "SINTER %s %s", keyA.c_str(), keyB.c_str()));
+    auto reply =
+      safeRedisCommand(context, "SINTER %s %s", keyA.c_str(), keyB.c_str());
     std::set<std::string> result = extractStringSetFromReply(*reply);
 
     return result;
@@ -412,8 +410,8 @@ std::set<std::string> Redis::sinter(const std::string& keyA,
 std::set<std::string> Redis::sdiff(const std::string& keyA,
                                    const std::string& keyB)
 {
-    auto reply = wrapReply((redisReply*)redisCommand(
-      context, "SDIFF %s %s", keyA.c_str(), keyB.c_str()));
+    auto reply =
+      safeRedisCommand(context, "SDIFF %s %s", keyA.c_str(), keyB.c_str());
     std::set<std::string> result = extractStringSetFromReply(*reply);
 
     return result;
@@ -421,8 +419,7 @@ std::set<std::string> Redis::sdiff(const std::string& keyA,
 
 int Redis::lpushLong(const std::string& key, long value)
 {
-    auto reply = wrapReply(
-      (redisReply*)redisCommand(context, "LPUSH %s %i", key.c_str(), value));
+    auto reply = safeRedisCommand(context, "LPUSH %s %i", key.c_str(), value);
     long long int result = reply->integer;
 
     return result;
@@ -430,21 +427,19 @@ int Redis::lpushLong(const std::string& key, long value)
 
 int Redis::rpushLong(const std::string& key, long value)
 {
-    auto reply = wrapReply(
-      (redisReply*)redisCommand(context, "RPUSH %s %i", key.c_str(), value));
+    auto reply = safeRedisCommand(context, "RPUSH %s %i", key.c_str(), value);
     long long int result = reply->integer;
     return result;
 }
 
 void Redis::flushAll()
 {
-    auto reply = wrapReply((redisReply*)redisCommand(context, "FLUSHALL"));
+    safeRedisCommand(context, "FLUSHALL");
 }
 
 long Redis::listLength(const std::string& queueName)
 {
-    auto reply = wrapReply(
-      (redisReply*)redisCommand(context, "LLEN %s", queueName.c_str()));
+    auto reply = safeRedisCommand(context, "LLEN %s", queueName.c_str());
 
     if (reply == nullptr || reply->type == REDIS_REPLY_NIL) {
         return 0;
@@ -457,8 +452,7 @@ long Redis::listLength(const std::string& queueName)
 
 long Redis::getTtl(const std::string& key)
 {
-    auto reply =
-      wrapReply((redisReply*)redisCommand(context, "TTL %s", key.c_str()));
+    auto reply = safeRedisCommand(context, "TTL %s", key.c_str());
 
     long ttl = reply->integer;
 
@@ -467,8 +461,7 @@ long Redis::getTtl(const std::string& key)
 
 void Redis::expire(const std::string& key, long expiry)
 {
-    auto reply = wrapReply(
-      (redisReply*)redisCommand(context, "EXPIRE %s %d", key.c_str(), expiry));
+    safeRedisCommand(context, "EXPIRE %s %d", key.c_str(), expiry);
 }
 
 void Redis::refresh()
@@ -492,8 +485,8 @@ void Redis::getRange(const std::string& key,
           " too long for buffer length " + std::to_string(bufferLen));
     }
 
-    auto reply = wrapReply((redisReply*)redisCommand(
-      context, "GETRANGE %s %li %li", key.c_str(), start, end));
+    auto reply =
+      safeRedisCommand(context, "GETRANGE %s %li %li", key.c_str(), start, end);
 
     // Importantly getrange is inclusive so we need to be checking the buffer
     // length
@@ -529,12 +522,11 @@ void Redis::releaseLock(const std::string& key, uint32_t lockId)
 void Redis::delIfEq(const std::string& key, uint32_t value)
 {
     // Invoke the script
-    auto reply =
-      wrapReply((redisReply*)redisCommand(context,
-                                          "EVALSHA %s 1 %s %i",
-                                          instance.delifeqSha.c_str(),
-                                          key.c_str(),
-                                          value));
+    auto reply = safeRedisCommand(context,
+                                  "EVALSHA %s 1 %s %i",
+                                  instance.delifeqSha.c_str(),
+                                  key.c_str(),
+                                  value);
 
     extractScriptResult(*reply);
 }
@@ -545,8 +537,8 @@ bool Redis::setnxex(const std::string& key, long value, int expirySeconds)
     // We use NX to say "set if not exists" and ex to specify the expiry of this
     // key/value This is useful in implementing locks. We only use longs as
     // values to keep things simple
-    auto reply = wrapReply((redisReply*)redisCommand(
-      context, "SET %s %i EX %i NX", key.c_str(), value, expirySeconds));
+    auto reply = safeRedisCommand(
+      context, "SET %s %i EX %i NX", key.c_str(), value, expirySeconds);
 
     bool success = false;
     if (reply->type == REDIS_REPLY_ERROR) {
@@ -560,8 +552,7 @@ bool Redis::setnxex(const std::string& key, long value, int expirySeconds)
 
 long Redis::getLong(const std::string& key)
 {
-    auto reply =
-      wrapReply((redisReply*)redisCommand(context, "GET %s", key.c_str()));
+    auto reply = safeRedisCommand(context, "GET %s", key.c_str());
 
     long res = getLongFromReply(*reply);
 
@@ -572,8 +563,7 @@ void Redis::setLong(const std::string& key, long value)
 {
     // Format is NOT printf compatible contrary to what the docs say, hence %i
     // instead of %l.
-    auto reply = wrapReply(
-      (redisReply*)redisCommand(context, "SET %s %i", key.c_str(), value));
+    safeRedisCommand(context, "SET %s %i", key.c_str(), value);
 }
 
 /**
@@ -582,8 +572,7 @@ void Redis::setLong(const std::string& key, long value)
 
 void Redis::enqueue(const std::string& queueName, const std::string& value)
 {
-    auto reply = wrapReply((redisReply*)redisCommand(
-      context, "RPUSH %s %s", queueName.c_str(), value.c_str()));
+    safeRedisCommand(context, "RPUSH %s %s", queueName.c_str(), value.c_str());
 }
 
 void Redis::enqueueBytes(const std::string& queueName,
@@ -599,7 +588,7 @@ void Redis::enqueueBytes(const std::string& queueName,
     // NOTE: Here we must be careful with the input and specify bytes rather
     // than a string otherwise an encoded false boolean can be treated as a
     // string terminator
-    auto reply = (redisReply*)redisCommand(
+    auto reply = safeRedisCommand(
       context, "RPUSH %s %b", queueName.c_str(), buffer, bufferLen);
 
     if (reply->type != REDIS_REPLY_INTEGER) {
@@ -609,8 +598,6 @@ void Redis::enqueueBytes(const std::string& queueName,
         throw std::runtime_error("Failed to enqueue bytes. Length = " +
                                  std::to_string(reply->integer));
     }
-
-    freeReplyObject(reply);
 }
 
 UniqueRedisReply Redis::dequeueBase(const std::string& queueName, int timeoutMs)
@@ -625,12 +612,11 @@ UniqueRedisReply Redis::dequeueBase(const std::string& queueName, int timeoutMs)
         // Floor to one second
         int timeoutSecs = std::max(timeoutMs / 1000, 1);
 
-        reply = wrapReply((redisReply*)redisCommand(
-          context, "BLPOP %s %d", queueName.c_str(), timeoutSecs));
+        reply = safeRedisCommand(
+          context, "BLPOP %s %d", queueName.c_str(), timeoutSecs);
     } else {
         // LPOP is non-blocking
-        reply = wrapReply(
-          (redisReply*)redisCommand(context, "LPOP %s", queueName.c_str()));
+        reply = safeRedisCommand(context, "LPOP %s", queueName.c_str());
     }
 
     // Check if we got anything
@@ -686,8 +672,8 @@ void Redis::dequeueMultiple(const std::string& queueName,
                             long nElems)
 {
     // NOTE - much like other range stuff with redis, this is *INCLUSIVE*
-    auto reply = wrapReply((redisReply*)redisCommand(
-      context, "LRANGE %s 0 %i", queueName.c_str(), nElems - 1));
+    auto reply = safeRedisCommand(
+      context, "LRANGE %s 0 %i", queueName.c_str(), nElems - 1);
 
     long offset = 0;
     for (size_t i = 0; i < reply->elements; i++) {
@@ -744,7 +730,7 @@ size_t Redis::dequeueBytes(const std::string& queueName,
           ")");
     }
 
-    memcpy(buffer, resultBytes, resultLen);
+    ::memcpy(buffer, resultBytes, resultLen);
     return resultLen;
 }
 
@@ -752,18 +738,17 @@ void Redis::publishSchedulerResult(const std::string& key,
                                    const std::string& status_key,
                                    const std::vector<uint8_t>& result)
 {
-    auto reply =
-      wrapReply((redisReply*)redisCommand(context,
-                                          "EVALSHA %s 2 %s %s %b %d %d",
-                                          instance.schedPublishSha.c_str(),
-                                          // keys
-                                          key.c_str(),
-                                          status_key.c_str(),
-                                          // argv
-                                          result.data(),
-                                          result.size(),
-                                          RESULT_KEY_EXPIRY,
-                                          STATUS_KEY_EXPIRY));
+    auto reply = safeRedisCommand(context,
+                                  "EVALSHA %s 2 %s %s %b %d %d",
+                                  instance.schedPublishSha.c_str(),
+                                  // keys
+                                  key.c_str(),
+                                  status_key.c_str(),
+                                  // argv
+                                  result.data(),
+                                  result.size(),
+                                  RESULT_KEY_EXPIRY,
+                                  STATUS_KEY_EXPIRY);
     extractScriptResult(*reply);
 }
 }
diff --git a/src/scheduler/Executor.cpp b/src/scheduler/Executor.cpp
index edcc70108..a63360410 100644
--- a/src/scheduler/Executor.cpp
+++ b/src/scheduler/Executor.cpp
@@ -67,7 +67,7 @@ void Executor::finish()
           ExecutorTask(POOL_SHUTDOWN, nullptr, nullptr, false, false));
 
         faabric::util::UniqueLock threadsLock(threadsMutex);
-        // copy shared_ptr to avoid racing
+        // Copy shared_ptr to avoid racing
         auto thread = threadPoolThreads.at(i);
         // If already killed, move to the next thread
         if (thread == nullptr) {
diff --git a/src/scheduler/Scheduler.cpp b/src/scheduler/Scheduler.cpp
index 01f26c3f3..49af9dd80 100644
--- a/src/scheduler/Scheduler.cpp
+++ b/src/scheduler/Scheduler.cpp
@@ -117,6 +117,7 @@ void Scheduler::reset()
         e->finish();
     }
     deadExecutorsList.clear();
+
     faabric::util::FullLock lock(mx);
     executors.clear();
     deadExecutors.clear();
diff --git a/src/state/StateKeyValue.cpp b/src/state/StateKeyValue.cpp
index 77ebd3246..8ad4ece49 100644
--- a/src/state/StateKeyValue.cpp
+++ b/src/state/StateKeyValue.cpp
@@ -222,7 +222,7 @@ void StateKeyValue::flagDirty()
 void StateKeyValue::zeroDirtyMask()
 {
     checkSizeConfigured();
-    memset(dirtyMask.get(), 0, valueSize);
+    ::memset(dirtyMask.get(), 0, valueSize);
 }
 
 void StateKeyValue::flagChunkDirty(long offset, long len)
@@ -236,7 +236,7 @@ void StateKeyValue::flagChunkDirty(long offset, long len)
 void StateKeyValue::markDirtyChunk(long offset, long len)
 {
     isDirty |= true;
-    memset(dirtyMask.get() + offset, ONES_BITMASK, len);
+    ::memset(dirtyMask.get() + offset, ONES_BITMASK, len);
 }
 
 size_t StateKeyValue::size() const
@@ -504,7 +504,7 @@ void StateKeyValue::doPullChunk(bool lazy, long offset, size_t length)
     pullChunkFromRemote(offset, length);
 
     // Mark the chunk as pulled
-    memset(pulledMask.get() + offset, ONES_BITMASK, length);
+    ::memset(pulledMask.get() + offset, ONES_BITMASK, length);
 }
 
 void StateKeyValue::doPushPartial(const uint8_t* dirtyMaskBytes)
@@ -528,7 +528,7 @@ void StateKeyValue::doPushPartial(const uint8_t* dirtyMaskBytes)
     const std::vector<StateChunk>& chunks = getDirtyChunks(dirtyMaskBytes);
 
     // Zero the mask now that we're finished with it
-    memset((void*)dirtyMaskBytes, 0, valueSize);
+    ::memset((void*)dirtyMaskBytes, 0, valueSize);
 
     // Push
     pushPartialToRemote(chunks);
diff --git a/src/util/testing.cpp b/src/util/testing.cpp
index b0036885b..7328635a0 100644
--- a/src/util/testing.cpp
+++ b/src/util/testing.cpp
@@ -3,8 +3,8 @@
 #include <atomic>
 
 namespace faabric::util {
-static std::atomic_bool testMode = false;
-static std::atomic_bool mockMode = false;
+static std::atomic<bool> testMode = false;
+static std::atomic<bool> mockMode = false;
 
 void setTestMode(bool val)
 {
diff --git a/tasks/dev.py b/tasks/dev.py
index 81c6827d6..c70f53d89 100644
--- a/tasks/dev.py
+++ b/tasks/dev.py
@@ -89,8 +89,7 @@ def sanitise(ctx, mode, noclean=False, shared=False):
     mode_list = ["Address", "Thread", "Undefined", "Leak", "Memory"]
     if mode not in mode_list:
         print("Unrecognised sanitiser mode: {}".format(mode))
-        print("Sanitisier mode must be one in {}".format(mode_list))
-        return
+        raise RuntimeError("Sanitisier mode must be one in {}".format(mode_list))
 
     cmake(
         ctx,

From 279c9a805e09919c691f8081d4b327a292dc43ea Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Mon, 29 Nov 2021 17:01:16 +0000
Subject: [PATCH 45/48] Remove mode checking from python as it's already done
 by cmake

---
 tasks/dev.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tasks/dev.py b/tasks/dev.py
index c70f53d89..65b833a7d 100644
--- a/tasks/dev.py
+++ b/tasks/dev.py
@@ -86,10 +86,6 @@ def sanitise(ctx, mode, noclean=False, shared=False):
     """
     Build the tests with different sanitisers
     """
-    mode_list = ["Address", "Thread", "Undefined", "Leak", "Memory"]
-    if mode not in mode_list:
-        print("Unrecognised sanitiser mode: {}".format(mode))
-        raise RuntimeError("Sanitisier mode must be one in {}".format(mode_list))
 
     cmake(
         ctx,

From eb58bbaa4b9408f0628cad156f269f472f9ee69d Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Mon, 29 Nov 2021 17:07:24 +0000
Subject: [PATCH 46/48] Fix gha cache key, add restore-keys to fall back on
 cache misses and try a different key

---
 .github/workflows/sanitisers.yml | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index 4d105f68b..96f5d56e6 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -41,7 +41,10 @@ jobs:
       - uses: actions/cache@v2
         with:
           path: '~/.conan'
-          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('cmake/ExternalProjects.cmake') }}
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('/code/faabric/cmake/ExternalProjects.cmake') }}
+          restore-keys: |
+            ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-
+            ${{ runner.os }}-
       # --- Build dependencies
       - name: "Build dependencies to be shared by all sanitiser runs"
         run: inv dev.cmake -b Debug
@@ -80,7 +83,10 @@ jobs:
       - uses: actions/cache@v2
         with:
           path: '~/.conan'
-          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('cmake/ExternalProjects.cmake') }}
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('/code/faabric/cmake/ExternalProjects.cmake') }}
+          restore-keys: |
+            ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-
+            ${{ runner.os }}-
       # --- Build with thread sanitising
       - name: "Build tests with address sanitising"
         run: inv dev.sanitise Address --noclean
@@ -124,7 +130,10 @@ jobs:
       - uses: actions/cache@v2
         with:
           path: '~/.conan'
-          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('cmake/ExternalProjects.cmake') }}
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('/code/faabric/cmake/ExternalProjects.cmake') }}
+          restore-keys: |
+            ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-
+            ${{ runner.os }}-
       # --- Build with thread sanitising
       - name: "Build tests with thread sanitising"
         run: inv dev.sanitise Thread --noclean
@@ -169,7 +178,10 @@ jobs:
       - uses: actions/cache@v2
         with:
           path: '~/.conan'
-          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('cmake/ExternalProjects.cmake') }}
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('/code/faabric/cmake/ExternalProjects.cmake') }}
+          restore-keys: |
+            ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-
+            ${{ runner.os }}-
       # --- Build with thread sanitising
       - name: "Build tests with undefined sanitising"
         run: inv dev.sanitise Undefined --noclean
@@ -214,7 +226,10 @@ jobs:
       - uses: actions/cache@v2
         with:
           path: '~/.conan'
-          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('cmake/ExternalProjects.cmake') }}
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('/code/faabric/cmake/ExternalProjects.cmake') }}
+          restore-keys: |
+            ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-
+            ${{ runner.os }}-
       # --- Build with thread sanitising
       - name: "Build tests with leak sanitising"
         run: inv dev.sanitise Leak --noclean

From fd98afeb467355603d775f2c51a7f65649d3033c Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Mon, 29 Nov 2021 17:16:17 +0000
Subject: [PATCH 47/48] Another attempt at fixing checksum hashes, this time
 use bash instead of GHA builtins

---
 .github/workflows/sanitisers.yml | 35 +++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index 96f5d56e6..5d4d0abf3 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -38,10 +38,15 @@ jobs:
         run: |
           echo "::set-output name=conan-version::$(conan --version | tr -d '[:alpha:] ')"
         shell: bash
+      - name: Hash ExternalProjects
+        id: hash-external
+        run: |
+          echo "::set-output name=hash-external::$(sha256sum cmake/ExternalProjects.cmake | cut '-d ' -f 1)"
+        shell: bash
       - uses: actions/cache@v2
         with:
           path: '~/.conan'
-          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('/code/faabric/cmake/ExternalProjects.cmake') }}
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ steps.hash-external.outputs.hash-external }}
           restore-keys: |
             ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-
             ${{ runner.os }}-
@@ -80,10 +85,15 @@ jobs:
         run: |
           echo "::set-output name=conan-version::$(conan --version | tr -d '[:alpha:] ')"
         shell: bash
+      - name: Hash ExternalProjects
+        id: hash-external
+        run: |
+          echo "::set-output name=hash-external::$(sha256sum cmake/ExternalProjects.cmake | cut '-d ' -f 1)"
+        shell: bash
       - uses: actions/cache@v2
         with:
           path: '~/.conan'
-          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('/code/faabric/cmake/ExternalProjects.cmake') }}
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ steps.hash-external.outputs.hash-external }}
           restore-keys: |
             ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-
             ${{ runner.os }}-
@@ -127,10 +137,15 @@ jobs:
         run: |
           echo "::set-output name=conan-version::$(conan --version | tr -d '[:alpha:] ')"
         shell: bash
+      - name: Hash ExternalProjects
+        id: hash-external
+        run: |
+          echo "::set-output name=hash-external::$(sha256sum cmake/ExternalProjects.cmake | cut '-d ' -f 1)"
+        shell: bash
       - uses: actions/cache@v2
         with:
           path: '~/.conan'
-          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('/code/faabric/cmake/ExternalProjects.cmake') }}
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ steps.hash-external.outputs.hash-external }}
           restore-keys: |
             ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-
             ${{ runner.os }}-
@@ -175,10 +190,15 @@ jobs:
         run: |
           echo "::set-output name=conan-version::$(conan --version | tr -d '[:alpha:] ')"
         shell: bash
+      - name: Hash ExternalProjects
+        id: hash-external
+        run: |
+          echo "::set-output name=hash-external::$(sha256sum cmake/ExternalProjects.cmake | cut '-d ' -f 1)"
+        shell: bash
       - uses: actions/cache@v2
         with:
           path: '~/.conan'
-          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('/code/faabric/cmake/ExternalProjects.cmake') }}
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ steps.hash-external.outputs.hash-external }}
           restore-keys: |
             ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-
             ${{ runner.os }}-
@@ -223,10 +243,15 @@ jobs:
         run: |
           echo "::set-output name=conan-version::$(conan --version | tr -d '[:alpha:] ')"
         shell: bash
+      - name: Hash ExternalProjects
+        id: hash-external
+        run: |
+          echo "::set-output name=hash-external::$(sha256sum cmake/ExternalProjects.cmake | cut '-d ' -f 1)"
+        shell: bash
       - uses: actions/cache@v2
         with:
           path: '~/.conan'
-          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ hashFiles('/code/faabric/cmake/ExternalProjects.cmake') }}
+          key: ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-${{ steps.hash-external.outputs.hash-external }}
           restore-keys: |
             ${{ runner.os }}-${{ steps.get-conan-version.outputs.conan-version }}-
             ${{ runner.os }}-

From d5341e843c85de76547d33bad57859bc5eed85e8 Mon Sep 17 00:00:00 2001
From: Jakub Szewczyk <git@kubasz.xyz>
Date: Tue, 30 Nov 2021 10:11:55 +0000
Subject: [PATCH 48/48] Show full test and sanitiser logs, it halts on errors
 so still should be readable

---
 .github/workflows/sanitisers.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/sanitisers.yml b/.github/workflows/sanitisers.yml
index 5d4d0abf3..ea646f44b 100644
--- a/.github/workflows/sanitisers.yml
+++ b/.github/workflows/sanitisers.yml
@@ -101,7 +101,7 @@ jobs:
       - name: "Build tests with address sanitising"
         run: inv dev.sanitise Address --noclean
       - name: "Run tests with address sanitising on and print to stderr to file"
-        run: ./bin/faabric_tests > /dev/null
+        run: ./bin/faabric_tests
         working-directory: /build/faabric/static
         env:
           ASAN_OPTIONS: "verbosity=1:halt_on_error=1"
@@ -154,7 +154,7 @@ jobs:
         run: inv dev.sanitise Thread --noclean
       # --- Tests ---
       - name: "Run tests with thread sanitising on and print stderr to file"
-        run: ./bin/faabric_tests > /dev/null
+        run: ./bin/faabric_tests
         working-directory: /build/faabric/static
         env:
           TSAN_OPTIONS: "verbosity=1:halt_on_error=1:suppressions=/code/faabric/thread-sanitizer-ignorelist.txt:history_size=7"
@@ -207,7 +207,7 @@ jobs:
         run: inv dev.sanitise Undefined --noclean
       # --- Tests ---
       - name: "Run tests with undefined sanitising on and print stderr to file"
-        run: ./bin/faabric_tests > /dev/null
+        run: ./bin/faabric_tests
         working-directory: /build/faabric/static
         env:
           UBSAN_OPTIONS: "print_stacktrace=1:halt_on_error=1"
@@ -260,5 +260,5 @@ jobs:
         run: inv dev.sanitise Leak --noclean
       # --- Tests ---
       - name: "Run tests with leak sanitising on and print stderr to file"
-        run: ./bin/faabric_tests > /dev/null
+        run: ./bin/faabric_tests
         working-directory: /build/faabric/static