Works with latest stable Zig 0.13.0.
This library helps to interact with NVIDIA GPUs from Zig. Provides high level interface to communicate with GPU. It can detect CUDA installation and link it to a project's binary on Linux/MacOS. Check customization to give CUDA manual path.
- Memory Allocation in GPU with defined size
- Copying data from host to GPU and viceversa
- Compiling (
.cu
) and loading kernels (.ptx
) both from file and text - Running kernels with grid/blocks/threads configuration
- Generate random numbers
Check test.zig file for code samples.
Download and save the library path in build.zig.zon
file by running:
zig fetch --save https://github.com/mkashirin/cudaz/tarball/master
Add Cudaz module in your project's build.zig
file that will link to your
project's binary.
// build.zig
const std = @import("std");
pub fn build(b: *std.Build) !void {
// exe points to main.zig that uses cudaz
const exe = b.addExecutable(.{
.name = "main",
.root_source_file = .{ .path = "src/main.zig" },
.target = b.host,
});
// Point to cudaz dependency
const cudaz_dep = b.dependency("cudaz", .{});
// Fetch and add the module from cudaz dependency
const cudaz_module = cudaz_dep.module("cudaz");
exe.root_module.addImport("cudaz", cudaz_module);
// Dynamically link to libc, cuda, nvrtc
exe.linkLibC();
exe.linkSystemLibrary("cuda");
exe.linkSystemLibrary("nvrtc");
// Run binary
const run = b.step("run", "Run the binary");
const run_step = b.addRunArtifact(exe);
run.dependOn(&run_step.step);
}
To run the code below, refer to the example project: increment.
// src/main.zig
const std = @import("std");
const Cuda = @import("cudaz");
const CuDevice = Cuda.Device;
const CuCompile = Cuda.Compile;
const CuLaunchConfig = Cuda.LaunchConfig;
// CUDA Kernel
const increment_kernel =
\\extern "C" __global__ void increment(float *out)
\\{
\\ int i = blockIdx.x * blockDim.x + threadIdx.x;
\\ out[i] = out[i] + 1;
\\}
;
pub fn main() !void {
// Initialize allocator.
var gp = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gp.deinit();
const allocator = gp.allocator();
// Initialize GPU.
const device = try CuDevice.default();
defer device.free();
// Copy data from host to GPU.
const data = [_]f32{ 1.2, 2.8, 0.123 };
const cu_slice = try device.htodCopy(f32, &data);
defer cu_slice.free();
// Compile and load the Kernel.
const ptx = try CuCompile.cudaText(increment_kernel, .{}, allocator);
defer allocator.free(ptx);
const module = try CuDevice.loadPtxText(ptx);
const function = try module.getFunc("increment");
// Run the kernel on the data.
try function.run(.{&cu_slice.device_ptr}, CuLaunchConfig{
.block_dim = .{ 3, 1, 1 },
.grid_dim = .{ 1, 1, 1 },
.shared_mem_bytes = 0,
});
// Retrieve incremented data back to the system.
const incremented_arr = try CuDevice.syncReclaim(
f32,
allocator,
cu_slice,
);
defer incremented_arr.deinit();
}
Examples:
- It is intelligent to identify and link to installed cuda libraries. If needed, provide cuda installation path manually by mentioning build parameter
zig build -DCUDA_PATH=<cuda_folder>
Inspired by Rust Cuda Library.