Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unaligned deref fix: Remeasure CI performance #3999

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ rec {
name = "motoko-rts-deps";
src = subpath ./rts;
sourceRoot = "rts/motoko-rts-tests";
sha256 = "sha256-jCk92mPwXd8H8zEH4OMdcEwFM8IiYdlhYdYr+WzDW5E=";
sha256 = "sha256-bmJF19LIvTMZnj78XF30lxqRlvQaZ0YlqCO2wnwmiNg=";
copyLockfile = true;
};

Expand Down
14 changes: 13 additions & 1 deletion rts/motoko-rts-tests/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ mod stream;
mod text;
mod utf8;

use motoko_rts::types::Bytes;
use motoko_rts::types::{read64, write64, Bytes};

fn main() {
if std::mem::size_of::<usize>() != 4 {
Expand All @@ -21,6 +21,7 @@ fn main() {
}

unsafe {
test_read_write_64_bit();
bigint::test();
bitrel::test();
continuation_table::test();
Expand All @@ -34,6 +35,17 @@ fn main() {
}
}

fn test_read_write_64_bit() {
println!("Testing 64-bit read-write");
const TEST_VALUE: u64 = 0x1234_5678_9abc_def0;
let mut lower = 0u32;
let mut upper = 0u32;
write64(&mut lower, &mut upper, TEST_VALUE);
assert_eq!(lower, 0x9abc_def0);
assert_eq!(upper, 0x1234_5678);
assert_eq!(read64(lower, upper), TEST_VALUE);
}

// Called by the RTS to panic
#[no_mangle]
extern "C" fn rts_trap(ptr: *const u8, len: Bytes<u32>) -> ! {
Expand Down
4 changes: 3 additions & 1 deletion rts/motoko-rts-tests/src/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::memory::TestMemory;

use motoko_rts::stream::alloc_stream;
use motoko_rts::types::{size_of, Blob, Bytes, Stream, Value, Words};
use motoko_rts_macros::is_incremental_gc;

pub unsafe fn test() {
println!("Testing streaming ...");
Expand Down Expand Up @@ -33,7 +34,8 @@ pub unsafe fn test() {
println!(" Testing stream decay");
let blob = stream.as_stream().split();
assert_eq!(blob.as_blob().len(), Bytes(STREAM_SMALL_SIZE));
assert_eq!(stream.as_blob().len(), Bytes(24));
const REMAINDER: u32 = if is_incremental_gc!() { 20 } else { 24 };
assert_eq!(stream.as_blob().len(), Bytes(REMAINDER));

println!(" Testing stream filling (blocks)");
const STREAM_LARGE_SIZE: u32 = 6000;
Expand Down
6 changes: 5 additions & 1 deletion rts/motoko-rts/src/gc/incremental/mark_bitmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ impl MarkBitmap {
}

/// Assign and initialize the bitmap memory at the defined address.
/// The `bitmap_address` must be 64-bit-aligned for fast iteration.
pub unsafe fn assign(&mut self, bitmap_address: *mut u8) {
debug_assert_eq!(bitmap_address as usize % size_of::<u64>(), 0);
memzero(
bitmap_address as usize,
Bytes(BITMAP_SIZE as u32).to_words(),
Expand Down Expand Up @@ -96,7 +98,7 @@ impl MarkBitmap {
/// The iterator separates advancing `next()` from inspection `current_marked_offset()`
/// to better support the incremental evacuation and update GC increments.
pub struct BitmapIterator {
/// Start address of the mark bitmap.
/// Start address of the mark bitmap. Must be 64-bit-aligned.
bitmap_pointer: *mut u8,
/// Index of next bit to continue iteration in the bitmap.
/// Invariant during (initialized and unfinished):
Expand All @@ -123,6 +125,7 @@ const BIT_INDEX_END: usize = BITMAP_SIZE * u8::BITS as usize;
const _: () = assert!(BIT_INDEX_END < BITMAP_ITERATION_END);

impl BitmapIterator {
/// The `bitmap_pointer` must be 64-bit-aligned.
fn new(bitmap_pointer: *mut u8) -> BitmapIterator {
debug_assert_ne!(bitmap_pointer, null_mut());
debug_assert_eq!(PARTITION_SIZE % size_of::<u64>(), 0);
Expand Down Expand Up @@ -171,6 +174,7 @@ impl BitmapIterator {
if self.next_bit_index < BIT_INDEX_END {
debug_assert_eq!(self.next_bit_index % u8::BITS as usize, 0);
let word64_index = self.next_bit_index / u8::BITS as usize;
// The bitmap pointer is guaranteed to be always 64-bit aligned, see `BitmapIterator::new()`.
self.current_word =
unsafe { *(self.bitmap_pointer.add(word64_index) as *const u64) };
self.leading_zeros = self.current_word.leading_zeros() as usize;
Expand Down
5 changes: 5 additions & 0 deletions rts/motoko-rts/src/gc/incremental/partitioned_heap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ impl PartitionedHeap {
rts_trap_with("Cannot grow memory");
}

/// The returned bitmap address is guaranteed to be 64-bit-aligned.
unsafe fn allocate_bitmap<M: Memory>(&mut self, mem: &mut M) -> *mut u8 {
if self.bitmap_allocation_pointer % PARTITION_SIZE == 0 {
let partition = self.allocate_temporary_partition();
Expand All @@ -437,6 +438,10 @@ impl PartitionedHeap {
}
let bitmap_address = self.bitmap_allocation_pointer as *mut u8;
self.bitmap_allocation_pointer += BITMAP_SIZE;
debug_assert_eq!(
bitmap_address as usize % size_of::<u64>().to_bytes().as_usize(),
0
);
bitmap_address
}

Expand Down
7 changes: 5 additions & 2 deletions rts/motoko-rts/src/gc/mark_compact/bitmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ pub unsafe fn iter_bits() -> BitmapIter {
let current_word = if blob_len_64bit_words == 0 {
0
} else {
*(BITMAP_PTR as *const u64)
let bitmap_ptr64 = BITMAP_PTR as *const u64;
bitmap_ptr64.read_unaligned()
};

debug_assert!(BITMAP_PTR as usize >= BITMAP_FORBIDDEN_PTR as usize);
Expand Down Expand Up @@ -190,7 +191,9 @@ impl BitmapIter {
return BITMAP_ITER_END;
}
self.current_word = unsafe {
*(BITMAP_FORBIDDEN_PTR.add(self.current_bit_idx as usize / 8) as *const u64)
let ptr64 =
BITMAP_FORBIDDEN_PTR.add(self.current_bit_idx as usize / 8) as *const u64;
ptr64.read_unaligned()
};
self.leading_zeros = self.current_word.leading_zeros();
}
Expand Down
28 changes: 14 additions & 14 deletions rts/motoko-rts/src/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
// into the leading bytes:
// - `obj header` contains tag (BLOB) and forwarding pointer
// - `len` is in blob metadata
// - 'padding' because the compiler automatically align `ptr64` to 64-bit according to
// C-memory representation
// - `ptr64`, `start64`, and `limit64` are each represented as two 32-bit components
// in little endian encoding.
// - `ptr64` and `limit64` are the next and past-end pointers into stable memory
// - `filled` and `cache` are the number of bytes consumed from the blob, and the
// staging area of the stream, respectively
Expand All @@ -40,10 +40,10 @@ use crate::rts_trap_with;
use crate::tommath_bindings::{mp_div_2d, mp_int};
use crate::types::{size_of, Blob, Bytes, Stream, Value, TAG_BLOB};

use motoko_rts_macros::{ic_mem_fn, is_incremental_gc};
use motoko_rts_macros::ic_mem_fn;

const MAX_STREAM_SIZE: Bytes<u32> = Bytes((1 << 30) - 1);
const INITIAL_STREAM_FILLED: Bytes<u32> = Bytes(if is_incremental_gc!() { 36 } else { 32 });
const INITIAL_STREAM_FILLED: Bytes<u32> = Bytes(32);
const STREAM_CHUNK_SIZE: Bytes<u32> = Bytes(128);

#[ic_mem_fn]
Expand All @@ -57,9 +57,9 @@ pub unsafe fn alloc_stream<M: Memory>(mem: &mut M, size: Bytes<u32>) -> *mut Str
}
let ptr = alloc_blob(mem, size + INITIAL_STREAM_FILLED);
let stream = ptr.as_stream();
(*stream).ptr64 = 0;
(*stream).start64 = 0;
(*stream).limit64 = 0;
stream.write_ptr64(0);
stream.write_start64(0);
stream.write_limit64(0);
(*stream).outputter = Stream::no_backing_store;
(*stream).filled = INITIAL_STREAM_FILLED;
allocation_barrier(ptr);
Expand Down Expand Up @@ -99,9 +99,9 @@ impl Stream {
#[cfg(feature = "ic")]
fn send_to_stable(self: *mut Self, ptr: *const u8, n: Bytes<u32>) {
unsafe {
let next_ptr64 = (*self).ptr64 + n.as_u32() as u64;
stable64_write_moc((*self).ptr64, ptr as u64, n.as_u32() as u64);
(*self).ptr64 = next_ptr64
let next_ptr64 = self.read_ptr64() + n.as_u32() as u64;
stable64_write_moc(self.read_ptr64(), ptr as u64, n.as_u32() as u64);
self.write_ptr64(next_ptr64);
}
}

Expand All @@ -111,9 +111,9 @@ impl Stream {
#[export_name = "stream_stable_dest"]
pub fn setup_stable_dest(self: *mut Self, start: u64, limit: u64) {
unsafe {
(*self).ptr64 = start;
(*self).start64 = start;
(*self).limit64 = limit;
self.write_ptr64(start);
self.write_start64(start);
self.write_limit64(limit);
(*self).outputter = Self::send_to_stable;
}
}
Expand All @@ -122,7 +122,7 @@ impl Stream {
#[export_name = "stream_write"]
pub fn cache_bytes(self: *mut Self, ptr: *const u8, n: Bytes<u32>) {
unsafe {
if (*self).limit64 != 0 && n > STREAM_CHUNK_SIZE
if self.read_limit64() != 0 && n > STREAM_CHUNK_SIZE
|| (*self).filled + n > (*self).header.len
{
self.flush();
Expand Down
57 changes: 51 additions & 6 deletions rts/motoko-rts/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -658,17 +658,29 @@ impl Blob {
}
}

/// Note: Do not declare 64-bit fields, as otherwise, the objects are expected to be 64-bit aligned.
/// This is not the case in the current heap design.
/// Moreover, fields would also get 64-bit aligned causing implicit paddding.

#[repr(C)] // See the note at the beginning of this module
pub struct Stream {
pub header: Blob,

// Cannot use `#[incremental_gc]` as Rust only allows non-macro attributes for fields.
#[cfg(feature = "incremental_gc")]
pub padding: u32, // The insertion of the forwarding pointer in the header implies 1 word padding to 64-bit.
/// Components of the 64-bit `ptr` value. Little-endian encoding.
/// Use `read_ptr64()` and `write_ptr64()` to access.
pub ptr_lower: u32,
pub ptr_upper: u32,

/// Components of the 64-bit `start` value. Little-endian encoding.
/// Use `read_start64()` and `write_start64()` to access.
pub start_lower: u32,
pub start_upper: u32,

/// Components of the 64-bit `limit` value. Little-endian encoding.
/// Use `read_limit64()` and `write_limit64()` to access.
pub limit_lower: u32,
pub limit_upper: u32,

pub ptr64: u64,
pub start64: u64,
pub limit64: u64,
pub outputter: fn(*mut Self, *const u8, Bytes<u32>) -> (),
pub filled: Bytes<u32>, // cache data follows ..
}
Expand All @@ -682,6 +694,39 @@ impl Stream {
debug_assert!(!self.is_forwarded());
self as *mut Blob
}

pub unsafe fn write_ptr64(self: *mut Self, value: u64) {
write64(&mut (*self).ptr_lower, &mut (*self).ptr_upper, value);
}

pub unsafe fn read_ptr64(self: *const Self) -> u64 {
read64((*self).ptr_lower, (*self).ptr_upper)
}

pub unsafe fn write_start64(self: *mut Self, value: u64) {
write64(&mut (*self).start_lower, &mut (*self).start_upper, value);
}

pub unsafe fn read_start64(self: *const Self) -> u64 {
read64((*self).start_lower, (*self).start_upper)
}

pub unsafe fn write_limit64(self: *mut Self, value: u64) {
write64(&mut (*self).limit_lower, &mut (*self).limit_upper, value);
}

pub unsafe fn read_limit64(self: *const Self) -> u64 {
read64((*self).limit_lower, (*self).limit_upper)
}
}

pub fn read64(lower: u32, upper: u32) -> u64 {
((upper as u64) << u32::BITS) | lower as u64
}

pub fn write64(lower: &mut u32, upper: &mut u32, value: u64) {
*upper = (value >> u32::BITS) as u32;
*lower = (value & u32::MAX as u64) as u32;
}

/// Only used by the copying GC - not to be confused with the forwarding pointer in the general object header
Expand Down
7 changes: 4 additions & 3 deletions src/codegen/compile.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,8 @@ module Heap = struct

(* Although we occasionally want to treat two consecutive
32 bit fields as one 64 bit number *)


(* Requires little-endian encoding, see also `Stream` in `types.rs` *)
let load_field64_unskewed (i : int32) : G.t =
let offset = Int32.mul word_size i in
G.i (Load {ty = I64Type; align = 2; offset; sz = None})
Expand Down Expand Up @@ -7017,7 +7018,7 @@ module BlobStream : Stream = struct
let name_for fn_name ts = "@Bl_" ^ fn_name ^ "<" ^ Typ_hash.typ_seq_hash ts ^ ">"

let absolute_offset env get_token =
let offset = if !Flags.gc_strategy = Flags.Incremental then 9l else 8l in (* see invariant in `stream.rs` *)
let offset = 8l in (* see invariant in `stream.rs` *)
let filled_field = Int32.add (Blob.len_field env) offset in
get_token ^^ Tagged.load_field_unskewed env filled_field

Expand Down Expand Up @@ -7115,7 +7116,7 @@ module Stabilization = struct
E.call_import env "rts" "stream_stable_dest"

let ptr64_field env =
let offset = if !Flags.gc_strategy = Flags.Incremental then 2l else 1l in (* see invariant in `stream.rs` *)
let offset = 1l in (* see invariant in `stream.rs` *)
Int32.add (Blob.len_field env) offset (* see invariant in `stream.rs`, padding for 64-bit after Stream header *)

let terminate env get_token get_data_size header_size =
Expand Down
2 changes: 1 addition & 1 deletion wasm-profiler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ version = "0.1.0"
edition = "2018"

[dependencies]
parity-wasm = "0.42.2"
parity-wasm = { version = "0.42.2", features = ["std", "sign_ext"] }
structopt = "0.3"
clap = "2.33"

Expand Down