Skip to content

Commit

Permalink
feat(fuzzing): Improve Wasm compilation fuzzer (#3380)
Browse files Browse the repository at this point in the history
This PR tries to improve the coverage of arbitrary Wasms generated by
the wasm fuzzers.

1. Compilation fuzzer:
   - Added checks for non determinism in compilation errors.  
- The arbitrary Wasm module provided to the fuzzer has the current
probability distribution
   ```
    // 33% - Random bytes
// 33% - Wasm with arbitrary wasm-smith config + maybe invalid functions
    // 33% - IC compliant wasm + maybe invalid functions
   ```
2. `ICWasmModule` will generate both wasm32 and wasm64 modules.
EmbeddersConfig and System API imports are handled accordingly.
 3. Unify EmbeddersConfig in `ic_wasm.rs`
  • Loading branch information
venkkatesh-sekar authored Jan 13, 2025
1 parent c16efb0 commit ad9ac37
Show file tree
Hide file tree
Showing 10 changed files with 270 additions and 152 deletions.
3 changes: 2 additions & 1 deletion rs/embedders/fuzz/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ rust_library(
"//rs/types/types",
"//rs/types/wasm_types",
"@crate_index//:arbitrary",
"@crate_index//:futures",
"@crate_index//:lazy_static",
"@crate_index//:libfuzzer-sys",
"@crate_index//:tokio",
Expand Down Expand Up @@ -121,7 +122,7 @@ rust_fuzz_test_binary_afl(
deps = [":wasm_fuzzers"] + ["@crate_index//:libfuzzer-sys"],
)

rust_fuzz_test_binary(
rust_fuzz_test_binary_afl(
name = "compile_wasm_using_embedder",
srcs = ["fuzz_targets/compile_wasm_using_embedder.rs"],
proc_macro_deps = MACRO_DEPENDENCIES,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use wasm_fuzzers::compile::run_fuzzer;
use wasm_fuzzers::compile::MaybeInvalidModule;

fuzz_target!(|module: MaybeInvalidModule| {
fuzz_target!(|module: &[u8]| {
run_fuzzer(module);
});
128 changes: 109 additions & 19 deletions rs/embedders/fuzz/src/compile.rs
Original file line number Diff line number Diff line change
@@ -1,44 +1,134 @@
use crate::ic_wasm::{generate_exports, ic_embedders_config, ic_wasm_config};
use arbitrary::{Arbitrary, Result, Unstructured};
use ic_config::embedders::Config as EmbeddersConfig;
use ic_config::flag_status::FlagStatus;
use ic_embedders::{wasm_utils::compile, WasmtimeEmbedder};
use ic_logger::replica_logger::no_op_logger;
use ic_wasm_types::BinaryEncodedWasm;
use wasm_smith::{Config, Module};
use std::time::Duration;
use tokio::runtime::Runtime;
use wasm_smith::{Config, MemoryOffsetChoices, Module};

#[derive(Debug)]
pub struct MaybeInvalidModule(pub Module);
pub struct MaybeInvalidModule {
pub module: Module,
pub memory64_enabled: bool,
}

const MAX_PARALLEL_EXECUTIONS: usize = 4;

impl<'a> Arbitrary<'a> for MaybeInvalidModule {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
let mut config = Config::arbitrary(u)?;
let mut config = if u.ratio(1, 2)? {
let memory64_enabled = u.ratio(2, 3)?;
let mut config = ic_wasm_config(ic_embedders_config(memory64_enabled));
config.exports = generate_exports(ic_embedders_config(memory64_enabled), u)?;
config.min_data_segments = 2;
config.max_data_segments = 10;
config
} else {
Config::arbitrary(u)?
};
config.allow_invalid_funcs = true;
Ok(MaybeInvalidModule(Module::new(config, u)?))
config.memory_offset_choices = MemoryOffsetChoices(40, 20, 40);
Ok(MaybeInvalidModule {
module: Module::new(config.clone(), u)?,
memory64_enabled: config.memory64_enabled,
})
}
}

#[inline(always)]
pub fn run_fuzzer(module: MaybeInvalidModule) {
let mut config = EmbeddersConfig::default();
config.feature_flags.wasm64 = FlagStatus::Enabled;
let wasm = module.0.to_bytes();
let binary_wasm = BinaryEncodedWasm::new(wasm);
let embedder = WasmtimeEmbedder::new(config, no_op_logger());

let (_, _) = compile(&embedder, &binary_wasm);
pub fn run_fuzzer(bytes: &[u8]) {
let config;
let mut u = Unstructured::new(bytes);

// Arbitrary Wasm module generation probabilities
// 33% - Random bytes
// 33% - Wasm with arbitrary wasm-smith config + maybe invalid functions
// 33% - IC compliant wasm + maybe invalid functions

// Only used w/ random bytes
let memory64_enabled = u.ratio(1, 2).unwrap_or(false);

let wasm = if u.ratio(1, 3).unwrap_or(false)
|| bytes.len() < <MaybeInvalidModule as Arbitrary>::size_hint(0).0
{
config = ic_embedders_config(memory64_enabled);
raw_wasm_bytes(bytes)
} else {
let data = <MaybeInvalidModule as Arbitrary>::arbitrary_take_rest(u);

match data {
Ok(data) => {
config = ic_embedders_config(data.memory64_enabled);
data.module.to_bytes()
}
Err(_) => {
config = ic_embedders_config(memory64_enabled);
raw_wasm_bytes(bytes)
}
}
};

let rt: Runtime = tokio::runtime::Builder::new_multi_thread()
.worker_threads(6)
.max_blocking_threads(2)
.enable_all()
.build()
.unwrap_or_else(|err| panic!("Could not create tokio runtime: {}", err));

let futs = (0..MAX_PARALLEL_EXECUTIONS)
.map(|_| {
rt.spawn({
let wasm = wasm.clone();
let binary_wasm = BinaryEncodedWasm::new(wasm);
let embedder = WasmtimeEmbedder::new(config.clone(), no_op_logger());

async move { compile(&embedder, &binary_wasm) }
})
})
.collect::<Vec<_>>();

rt.block_on(async move {
// The omitted field is EmbedderCache(Result<InstancePre<StoreData>, HypervisorError>)
// 1. InstancePre<StoreData> doesn't implement PartialEq
// 2. HypervisorError is the same in compilation_result which is checked for equality

let result = futures::future::join_all(futs)
.await
.into_iter()
.map(|r| r.expect("Failed to join tasks"))
.map(|(_, compilation_result)| {
if let Ok(mut r) = compilation_result {
r.0.compilation_time = Duration::from_millis(1);
Ok(r)
} else {
compilation_result
}
})
.collect::<Vec<_>>();

let first = result.first();

if let Some(first) = first {
assert!(result.iter().all(|r| r == first));
}
});
}

#[inline(always)]
fn raw_wasm_bytes(data: &[u8]) -> Vec<u8> {
let mut wasm: Vec<u8> = b"\x00asm".to_vec();
wasm.extend_from_slice(data);
wasm
}

#[cfg(test)]
mod tests {
use super::*;
use arbitrary::{Arbitrary, Unstructured};

#[test]
fn test_compile_wasm_using_embedder_single_run() {
let arbitrary_str: &str = "this is a test string";
let unstrucutred = Unstructured::new(arbitrary_str.as_bytes());
let module = <MaybeInvalidModule as Arbitrary>::arbitrary_take_rest(unstrucutred)
.expect("Unable to extract wasm from Unstructured data");
run_fuzzer(module);
run_fuzzer(arbitrary_str.as_bytes());
}
}
153 changes: 85 additions & 68 deletions rs/embedders/fuzz/src/differential.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
use crate::ic_wasm::ICWasmModule;
use ic_config::embedders::Config as EmbeddersConfig;
use ic_config::flag_status::FlagStatus;
use crate::ic_wasm::{ic_embedders_config, ICWasmModule};
use ic_embedders::InstanceRunResult;
use ic_interfaces::execution_environment::HypervisorResult;
use ic_interfaces::execution_environment::SystemApi;
Expand All @@ -11,10 +9,18 @@ use ic_types::methods::{FuncRef, WasmMethod};
use std::collections::BTreeSet;
use tokio::runtime::Runtime;

const MAX_PARALLEL_EXECUTIONS: usize = 2;
type DeterministicExecutionResult = Vec<(
HypervisorResult<Option<WasmResult>>,
HypervisorResult<InstanceRunResult>,
u64,
)>;

#[inline(always)]
pub fn run_fuzzer(module: ICWasmModule) {
let wasm = module.module.to_bytes();
let wasm_methods: BTreeSet<WasmMethod> = module.exported_functions;
let memory64_enabled = module.config.memory64_enabled;

if wasm_methods.is_empty() {
return;
Expand All @@ -29,88 +35,99 @@ pub fn run_fuzzer(module: ICWasmModule) {
.build()
.unwrap_or_else(|err| panic!("Could not create tokio runtime: {}", err));

let first_execution = rt.spawn({
let wasm = wasm.clone();
let wasm_methods = wasm_methods.clone();

async move { execute_wasm(wasm, wasm_methods) }
});
let futs = (0..MAX_PARALLEL_EXECUTIONS)
.map(|_| {
rt.spawn({
let wasm = wasm.clone();
let wasm_methods = wasm_methods.clone();

let second_execution = rt.spawn(async move { execute_wasm(wasm, wasm_methods) });
async move { execute_wasm(wasm, wasm_methods, memory64_enabled) }
})
})
.collect::<Vec<_>>();

rt.block_on(async move {
let first = first_execution.await.unwrap();
let second = second_execution.await.unwrap();

// same size
assert_eq!(first.len(), second.len());

for (x, y) in std::iter::zip(first, second) {
// execution result must be same
assert_eq!(x.0, y.0);

// instructions used must be same
assert_eq!(x.2, y.2);

match (x.1, y.1) {
(Ok(run_x), Ok(run_y)) => {
assert_eq!(run_x.wasm_dirty_pages, run_y.wasm_dirty_pages);
assert_eq!(
run_x.stable_memory_dirty_pages,
run_y.stable_memory_dirty_pages
);

// special treatment because of NaN
let globals_x = run_x.exported_globals;
let globals_y = run_y.exported_globals;
for (g_x, g_y) in std::iter::zip(globals_x, globals_y) {
match (g_x, g_y) {
(Global::F32(f_x), Global::F32(f_y)) => {
if !f_x.is_nan() && !f_y.is_nan() {
assert_eq!(f_x, f_y);
} else {
// should hold because of canonicalization
assert_eq!(f_x.to_bits(), f_y.to_bits());
}
}
(Global::F64(f_x), Global::F64(f_y)) => {
if !f_x.is_nan() && !f_y.is_nan() {
assert_eq!(f_x, f_y);
} else {
// should hold because of canonicalization
assert_eq!(f_x.to_bits(), f_y.to_bits());
}
let result = futures::future::join_all(futs)
.await
.into_iter()
.map(|r| r.expect("Failed to join tasks"))
.collect::<Vec<_>>();

let first = result.first();

if let Some(first) = first {
result
.iter()
.for_each(|r| equal(first.to_vec(), r.to_vec()));
}
});
}

// Panics if the results are not equal
fn equal(first: DeterministicExecutionResult, second: DeterministicExecutionResult) {
// same size
assert_eq!(first.len(), second.len());

for (x, y) in std::iter::zip(first, second) {
// execution result must be same
assert_eq!(x.0, y.0);

// instructions used must be same
assert_eq!(x.2, y.2);

match (x.1, y.1) {
(Ok(run_x), Ok(run_y)) => {
assert_eq!(run_x.wasm_dirty_pages, run_y.wasm_dirty_pages);
assert_eq!(
run_x.stable_memory_dirty_pages,
run_y.stable_memory_dirty_pages
);

// special treatment because of NaN
let globals_x = run_x.exported_globals;
let globals_y = run_y.exported_globals;
for (g_x, g_y) in std::iter::zip(globals_x, globals_y) {
match (g_x, g_y) {
(Global::F32(f_x), Global::F32(f_y)) => {
if !f_x.is_nan() && !f_y.is_nan() {
assert_eq!(f_x, f_y);
} else {
// should hold because of canonicalization
assert_eq!(f_x.to_bits(), f_y.to_bits());
}
(_, _) => {
assert_eq!(g_x, g_y);
}
(Global::F64(f_x), Global::F64(f_y)) => {
if !f_x.is_nan() && !f_y.is_nan() {
assert_eq!(f_x, f_y);
} else {
// should hold because of canonicalization
assert_eq!(f_x.to_bits(), f_y.to_bits());
}
}
(_, _) => {
assert_eq!(g_x, g_y);
}
}
}
(Err(e_x), Err(e_y)) => {
assert_eq!(e_x, e_y);
}
(_, _) => {
panic!("Instance results doesn't match");
}
}
(Err(e_x), Err(e_y)) => {
assert_eq!(e_x, e_y);
}
(_, _) => {
panic!("Instance results doesn't match");
}
}
});
}
}

#[inline(always)]
fn execute_wasm(
wasm: Vec<u8>,
wasm_methods: BTreeSet<WasmMethod>,
) -> Vec<(
HypervisorResult<Option<WasmResult>>,
HypervisorResult<InstanceRunResult>,
u64,
)> {
memory64_enabled: bool,
) -> DeterministicExecutionResult {
let mut result = vec![];
let mut config = EmbeddersConfig::default();
config.feature_flags.write_barrier = FlagStatus::Enabled;
config.feature_flags.wasm64 = FlagStatus::Enabled;
let config = ic_embedders_config(memory64_enabled);
let instance_result = WasmtimeInstanceBuilder::new()
.with_wasm(wasm)
.with_config(config)
Expand Down
Loading

0 comments on commit ad9ac37

Please sign in to comment.