Skip to content
This repository has been archived by the owner on Dec 9, 2018. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Jorge Aparicio committed Jun 10, 2016
1 parent 1f833cb commit 8e6b66f
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
*.png
*.ptx
*.rs.bk
Cargo.lock
target
14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@

Experiments with CUDA and Rust

Testing the [new PTX targets][Implementation].

```
$ rustc --target nvptx64-unknown-unknown --emit=asm -O kernel.rs
$ mv kernel.s tests/kernel.ptx
# change `.func memcpy_` to `.entry memcpy_`
$ edit tests/kernel.ptx
$ cargo test memcpy
```

## Examples

- [Query number of devices](/examples/query.rs).
Expand All @@ -11,7 +21,7 @@ Experiments with CUDA and Rust

## Areas to explore

- Generating PTX from Rust code at compile time. ([prior art]).
- ~~Generating PTX from Rust code at compile time. ([prior art]).~~ WIP. [Implementation]. [RFC].
- Type safety for launching kernels. Arity and argument types should be validated at compile time.
- Linear algebra library with transparent CUDA acceleration. A matrix type that stores its data
in the GPU, with operator sugar that maps to CuBLAS/custom kernels.
Expand All @@ -22,6 +32,8 @@ Experiments with CUDA and Rust

[linalg]: https://github.com/japaric/linalg.rs
[prior art]: http://blog.theincredibleholk.org/blog/2012/12/05/compiling-rust-for-gpus/
[Implementation]: https://github.com/rust-lang/rust/pull/34195
[RFC]: https://github.com/rust-lang/rfcs/pull/1641

## License

Expand Down
93 changes: 93 additions & 0 deletions kernel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#![allow(warnings)]
#![feature(intrinsics)]
#![feature(lang_items)]
#![feature(no_core)]
#![no_core]

use Option::*;
use Ordering::*;

#[no_mangle]
pub fn memcpy_(src: *const f32, dst: *mut f32, n: isize) {
unsafe {
let i = overflowing_add(overflowing_mul(block_idx_x(), block_dim_x()), thread_idx_x()) as isize;

if i < n {
*(offset(dst, i) as *mut f32) = *offset(src, i)
}
}
}

extern "rust-intrinsic" {
fn block_idx_x() -> i32;
fn block_dim_x() -> i32;
fn thread_idx_x() -> i32;

fn offset<T>(dst: *const T, offset: isize) -> *const T;
fn overflowing_add<T>(a: T, b: T) -> T;
fn overflowing_mul<T>(a: T, b: T) -> T;
}

#[lang = "copy"]
trait Copy {}

#[lang = "sized"]
trait Sized {}
// : PartialEq<Rhs>
#[lang = "ord"]
trait PartialOrd<Rhs: ?Sized = Self> {
fn partial_cmp(&self, other: &Rhs) -> Option<Ordering>;

#[inline]
fn lt(&self, other: &Rhs) -> bool {
match self.partial_cmp(other) {
Some(Less) => true,
_ => false,
}
}

#[inline]
fn le(&self, other: &Rhs) -> bool {
match self.partial_cmp(other) {
Some(Less) | Some(Equal) => true,
_ => false,
}
}

#[inline]
fn gt(&self, other: &Rhs) -> bool {
match self.partial_cmp(other) {
Some(Greater) => true,
_ => false,
}
}

#[inline]
fn ge(&self, other: &Rhs) -> bool {
match self.partial_cmp(other) {
Some(Greater) | Some(Equal) => true,
_ => false,
}
}
}

impl PartialOrd for isize {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
loop {}
}
fn lt(&self, other: &Self) -> bool { (*self) < (*other) }
fn le(&self, other: &Self) -> bool { (*self) <= (*other) }
fn ge(&self, other: &Self) -> bool { (*self) >= (*other) }
fn gt(&self, other: &Self) -> bool { (*self) > (*other) }
}

enum Option<T> {
None,
Some(T),
}

enum Ordering {
Less = -1,
Equal = 0,
Greater = 1,
}
7 changes: 4 additions & 3 deletions tests/memcpy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ extern crate uxx;
use std::ffi::CStr;
use std::mem;

use cuda::compile;
use cuda::driver::{self, Any, Block, Device, Direction, Grid, Result};
use rand::{Rng, XorShiftRng};
use uxx::u31;

const KERNEL: &'static str = include_str!("memcpy.cu");
const KERNEL: &'static str = include_str!("kernel.ptx");

#[test]
fn memcpy() {
Expand All @@ -21,7 +20,9 @@ fn run() -> Result<()> {
const SIZE: usize = 1024 * 1024;

// Compile KERNEL
let ref ptx = compile::source(KERNEL).unwrap();
let kernel = &mut KERNEL.to_owned().into_bytes();
kernel.push(0);
let ptx = CStr::from_bytes_with_nul(kernel).unwrap();

// Allocate memory on host
let ref mut rng: XorShiftRng = rand::thread_rng().gen();
Expand Down

0 comments on commit 8e6b66f

Please sign in to comment.