forked from trishume/telefork
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
progress on cuda-checkpoint, blocked on restoring with the same PID: N…
- Loading branch information
1 parent
94f5103
commit 26e9e70
Showing
8 changed files
with
201 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
use std::env; | ||
use std::fs; | ||
use std::path::Path; | ||
use std::process::Command; | ||
|
||
fn main() { | ||
// Define the URL of the file to download | ||
let url = "https://github.com/NVIDIA/cuda-checkpoint/blob/main/bin/x86_64_Linux/cuda-checkpoint?raw=true"; | ||
let filename = "cuda-checkpoint"; | ||
|
||
// Determine the output directory for the binary | ||
let out_dir = env::var("OUT_DIR").expect("OUT_DIR environment variable is not set"); | ||
let dest_path = Path::new(&out_dir).join(filename); | ||
|
||
// Download the binary using curl | ||
let status = Command::new("curl") | ||
.arg("-L") // Follow redirects | ||
.arg("-o") | ||
.arg(&dest_path) | ||
.arg(url) | ||
.status() | ||
.expect("Failed to execute curl"); | ||
|
||
if !status.success() { | ||
panic!("Failed to download cuda-checkpoint"); | ||
} | ||
|
||
// Make the binary executable | ||
#[cfg(unix)] | ||
{ | ||
use std::os::unix::fs::PermissionsExt; | ||
let mut perms = fs::metadata(&dest_path) | ||
.expect("Failed to retrieve metadata") | ||
.permissions(); | ||
perms.set_mode(0o755); | ||
fs::set_permissions(&dest_path, perms).expect("Failed to set permissions"); | ||
} | ||
|
||
// Print cargo metadata to add the binary to the build process | ||
println!("cargo:rerun-if-changed=build.rs"); | ||
println!("cargo:rerun-if-env-changed=OUT_DIR"); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#include <stdio.h> | ||
#include <unistd.h> | ||
#include <cuda_runtime.h> | ||
|
||
__device__ int counter = 100; | ||
|
||
__global__ void increment() | ||
{ | ||
counter++; | ||
} | ||
|
||
void checkCuda(cudaError_t result, const char *msg) { | ||
if (result != cudaSuccess) { | ||
fprintf(stderr, "CUDA Error: %s - %s\n", msg, cudaGetErrorString(result)); | ||
exit(1); | ||
} | ||
} | ||
|
||
int main(void) | ||
{ | ||
// Initialize CUDA | ||
checkCuda(cudaFree(0), "Initializing CUDA"); | ||
|
||
// Initialize counter to 100 on the device | ||
int initialCounter = 100; | ||
checkCuda(cudaMemcpyToSymbol(counter, &initialCounter, sizeof(int)), "Initializing counter"); | ||
|
||
while (true) { | ||
int hCounter = 0; | ||
|
||
// Launch the increment kernel | ||
increment<<<1, 1>>>(); | ||
checkCuda(cudaDeviceSynchronize(), "Kernel execution"); | ||
|
||
// Copy the counter from device to host | ||
checkCuda(cudaMemcpyFromSymbol(&hCounter, counter, sizeof(counter)), "Copying counter to host"); | ||
|
||
// Print the current counter value | ||
printf("%d\n", hCounter); | ||
|
||
// Wait for 1 second | ||
sleep(1); | ||
} | ||
|
||
return 0; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
use memfd_exec::{MemFdExecutable, Stdio}; | ||
|
||
/// Returns the cuda-checkpoint executable as an array of bytes. | ||
/// | ||
/// The cuda-checkpoint executable is used to checkpoint/restore CUDA state. | ||
#[cfg(target_os = "linux")] | ||
fn get_cuda_checkpoint_binary() -> &'static [u8] { | ||
include_bytes!(concat!(env!("OUT_DIR"), "/cuda-checkpoint")) | ||
} | ||
|
||
/// Run cuda-checkpoint. | ||
/// Ref: https://github.com/NVIDIA/cuda-checkpoint | ||
pub fn checkpoint(pid: i32) -> Result<(), Box<dyn std::error::Error>> { | ||
// The `MemFdExecutable` struct is at near feature-parity with `std::process::Command`, | ||
// so you can use it in the same way. The only difference is that you must provide the | ||
// executable contents as a `Vec<u8>` as well as telling it the argv[0] to use. | ||
let c = MemFdExecutable::new("cuda-checkpoint", get_cuda_checkpoint_binary()) | ||
.arg("--toggle") | ||
.args(["--pid", &pid.to_string().as_str()]) | ||
// We'll capture the stdout of the process, so we need to set up a pipe. | ||
.stdout(Stdio::piped()) | ||
// Spawn the process as a forked child | ||
.spawn()?; | ||
|
||
// Get the output and status code of the process (this will block until the process | ||
// exits) | ||
let output = c.wait_with_output()?; | ||
assert!(output.status.into_raw() == 0); | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters