From b920742e6bb35203b043263d4adb1a841d12e0b0 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 27 Feb 2024 12:46:40 -0600 Subject: [PATCH] Refactor generation of tables/elements in wasm-smith (#1426) * Refactor generation of tables/elements in wasm-smith This commit refactors `wasm-smith` and its generation of both table types and element segments. The goal is to help generate modules of shapes that Wasmtime does not currently support but should. Notably constant expressions are allowed to use `global.get`, even in element segments, and Wasmtime does not currently support this. Furthermore this commit additionally enables generating tables with initialization expressions which was not previously supported by `wasm-smith`. Internally this refactors a number of pieces of code that work with constant expressions to instead use a shared helper for generating constant expressions. This takes into account subtyping and such to try to generate interesting shapes of expressions when GC is enabled in particular. * Don't duplicate `arbitrary_ref_type` * Refactor core type generation in wasm-smith Don't pass around `type_ref_limit` as an explicit parameter but instead track it in the `Module` state. This enables reusing `self.arbitrary_ref_type` in `self.arbitrary_valtype` and preserves the property where self-referential types may be generated. * Fix fuzz-stats test * Fix wasm-smith tests --- .../src/bin/failed-instantiations.rs | 10 +- crates/wasm-encoder/src/core/code.rs | 14 + crates/wasm-smith/src/component.rs | 52 +- crates/wasm-smith/src/core.rs | 582 ++++++++++-------- crates/wasm-smith/src/core/code_builder.rs | 16 +- crates/wasm-smith/src/core/encode.rs | 40 +- crates/wasm-smith/src/core/terminate.rs | 7 +- crates/wasm-smith/tests/core.rs | 1 + 8 files changed, 418 insertions(+), 304 deletions(-) diff --git a/crates/fuzz-stats/src/bin/failed-instantiations.rs b/crates/fuzz-stats/src/bin/failed-instantiations.rs index d01b235a85..086353ffd4 100644 --- a/crates/fuzz-stats/src/bin/failed-instantiations.rs +++ b/crates/fuzz-stats/src/bin/failed-instantiations.rs @@ -113,7 +113,15 @@ impl State { // 1gb of memory. That's half the default allocation of memory for // libfuzzer-based fuzzers by default, and ideally we're not in a // situation where most of the modules are above this threshold. - let module = Module::new(&self.engine, &wasm).expect("failed to compile module"); + let module = match Module::new(&self.engine, &wasm) { + Ok(m) => m, + // NB: after bytecodealliance/wasm-tools#1426 wasm-smith is + // generating modules that Wasmtime can't handle until + // bytecodealliance/wasmtime#7996 is on crates.io, until that time + // ignore these errors. + Err(e) if format!("{e:?}").contains("unsupported init expr") => return Ok(()), + Err(e) => panic!("unexpected module compile error {e:?}"), + }; let mut store = Store::new( &self.engine, fuzz_stats::limits::StoreLimits { diff --git a/crates/wasm-encoder/src/core/code.rs b/crates/wasm-encoder/src/core/code.rs index 7d8e493422..76bfd7afde 100644 --- a/crates/wasm-encoder/src/core/code.rs +++ b/crates/wasm-encoder/src/core/code.rs @@ -3302,6 +3302,20 @@ impl ConstExpr { pub fn with_i64_mul(self) -> Self { self.with_insn(Instruction::I64Mul) } + + /// Returns the function, if any, referenced by this global. + pub fn get_ref_func(&self) -> Option { + let prefix = *self.bytes.get(0)?; + // 0xd2 == `ref.func` opcode, and if that's found then load the leb + // corresponding to the function index. + if prefix != 0xd2 { + return None; + } + leb128::read::unsigned(&mut &self.bytes[1..]) + .ok()? + .try_into() + .ok() + } } impl Encode for ConstExpr { diff --git a/crates/wasm-smith/src/component.rs b/crates/wasm-smith/src/component.rs index 77f7c04792..7a85b2bf10 100644 --- a/crates/wasm-smith/src/component.rs +++ b/crates/wasm-smith/src/component.rs @@ -11,7 +11,9 @@ use std::{ collections::{HashMap, HashSet}, rc::Rc, }; -use wasm_encoder::{ComponentTypeRef, ComponentValType, PrimitiveValType, TypeBounds, ValType}; +use wasm_encoder::{ + ComponentTypeRef, ComponentValType, HeapType, PrimitiveValType, RefType, TypeBounds, ValType, +}; mod encode; @@ -539,7 +541,7 @@ impl ComponentBuilder { } let ty = match u.int_in_range::(0..=1)? { - 0 => CoreType::Func(crate::core::arbitrary_func_type( + 0 => CoreType::Func(arbitrary_func_type( u, &self.config, &self.core_valtypes, @@ -818,7 +820,7 @@ impl ComponentBuilder { // Type definition. 2 => { - let ty = crate::core::arbitrary_func_type( + let ty = arbitrary_func_type( u, &self.config, &self.core_valtypes, @@ -954,7 +956,7 @@ impl ComponentBuilder { } fn arbitrary_core_table_type(&self, u: &mut Unstructured) -> Result { - crate::core::arbitrary_table_type(u, &self.config) + crate::core::arbitrary_table_type(u, &self.config, None) } fn arbitrary_core_memory_type(&self, u: &mut Unstructured) -> Result { @@ -2175,3 +2177,45 @@ struct CoreInstanceSection {} struct CoreTypeSection { types: Vec>, } + +fn arbitrary_func_type( + u: &mut Unstructured, + config: &Config, + valtypes: &[ValType], + max_results: Option, + type_ref_limit: u32, +) -> Result> { + let mut params = vec![]; + let mut results = vec![]; + arbitrary_loop(u, 0, 20, |u| { + params.push(arbitrary_valtype(u, config, valtypes, type_ref_limit)?); + Ok(true) + })?; + arbitrary_loop(u, 0, max_results.unwrap_or(20), |u| { + results.push(arbitrary_valtype(u, config, valtypes, type_ref_limit)?); + Ok(true) + })?; + Ok(Rc::new(crate::core::FuncType { params, results })) +} + +fn arbitrary_valtype( + u: &mut Unstructured, + config: &Config, + valtypes: &[ValType], + type_ref_limit: u32, +) -> Result { + if config.gc_enabled && type_ref_limit > 0 && u.ratio(1, 20)? { + Ok(ValType::Ref(RefType { + // TODO: For now, only create allow nullable reference + // types. Eventually we should support non-nullable reference types, + // but this means that we will also need to recognize when it is + // impossible to create an instance of the reference (eg `(ref + // nofunc)` has no instances, and self-referential types that + // contain a non-null self-reference are also impossible to create). + nullable: true, + heap_type: HeapType::Concrete(u.int_in_range(0..=type_ref_limit - 1)?), + })) + } else { + Ok(*u.choose(valtypes)?) + } +} diff --git a/crates/wasm-smith/src/core.rs b/crates/wasm-smith/src/core.rs index ab66d4450c..194d2101ae 100644 --- a/crates/wasm-smith/src/core.rs +++ b/crates/wasm-smith/src/core.rs @@ -9,6 +9,8 @@ use arbitrary::{Arbitrary, Result, Unstructured}; use code_builder::CodeBuilderAllocations; use flagset::{flags, FlagSet}; use std::collections::{HashMap, HashSet}; +use std::fmt; +use std::mem; use std::ops::Range; use std::rc::Rc; use std::str::{self, FromStr}; @@ -41,7 +43,6 @@ type Instruction = wasm_encoder::Instruction<'static>; /// To configure the shape of generated module, create a /// [`Config`][crate::Config] and then call [`Module::new`][crate::Module::new] /// with it. -#[derive(Debug)] pub struct Module { config: Config, duplicate_imports_behavior: DuplicateImportsBehavior, @@ -94,9 +95,8 @@ pub struct Module { /// aliased). num_defined_funcs: usize, - /// The number of tables defined in this module (not imported or - /// aliased). - num_defined_tables: usize, + /// Initialization expressions for all defined tables in this module. + defined_tables: Vec>, /// The number of memories defined in this module (not imported or /// aliased). @@ -104,7 +104,7 @@ pub struct Module { /// The indexes and initialization expressions of globals defined in this /// module. - defined_globals: Vec<(u32, GlobalInitExpr)>, + defined_globals: Vec<(u32, ConstExpr)>, /// All tags available to this module, sorted by their index. The list /// entry is the type of each tag. @@ -139,6 +139,13 @@ pub struct Module { /// Names currently exported from this module. export_names: HashSet, + + /// Reusable buffer in `self.arbitrary_const_expr` to amortize the cost of + /// allocation. + const_expr_choices: Vec Result>>, + + /// What the maximum type index that can be referenced is. + max_type_limit: MaxTypeLimit, } impl<'a> Arbitrary<'a> for Module { @@ -147,12 +154,27 @@ impl<'a> Arbitrary<'a> for Module { } } +impl fmt::Debug for Module { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Module") + .field("config", &self.config) + .field(&"...", &"...") + .finish() + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum DuplicateImportsBehavior { Allowed, Disallowed, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum MaxTypeLimit { + ModuleTypes, + Num(u32), +} + impl Module { /// Returns a reference to the internal configuration. pub fn config(&self) -> &Config { @@ -193,7 +215,7 @@ impl Module { num_imports: 0, num_defined_tags: 0, num_defined_funcs: 0, - num_defined_tables: 0, + defined_tables: Vec::new(), num_defined_memories: 0, defined_globals: Vec::new(), tags: Vec::new(), @@ -208,6 +230,8 @@ impl Module { data: Vec::new(), type_size: 0, export_names: HashSet::new(), + const_expr_choices: Vec::new(), + max_type_limit: MaxTypeLimit::ModuleTypes, } } } @@ -332,7 +356,7 @@ enum ElementKind { #[derive(Debug)] enum Elements { Functions(Vec), - Expressions(Vec>), + Expressions(Vec), } #[derive(Debug)] @@ -366,12 +390,6 @@ pub(crate) enum Offset { Global(u32), } -#[derive(Debug)] -pub(crate) enum GlobalInitExpr { - FuncRef(u32), - ConstExpr(ConstExpr), -} - impl Module { fn build(&mut self, u: &mut Unstructured) -> Result<()> { self.valtypes = configured_valtypes(&self.config); @@ -539,6 +557,8 @@ impl Module { fn arbitrary_rec_group(&mut self, u: &mut Unstructured) -> Result<()> { let rec_group_start = self.types.len(); + assert!(matches!(self.max_type_limit, MaxTypeLimit::ModuleTypes)); + if self.config.gc_enabled { // With small probability, clone an existing rec group. if self.clonable_rec_groups().next().is_some() && u.ratio(1, u8::MAX)? { @@ -549,16 +569,20 @@ impl Module { let max_rec_group_size = self.config.max_types - self.types.len(); let rec_group_size = u.int_in_range(0..=max_rec_group_size)?; let type_ref_limit = u32::try_from(self.types.len() + rec_group_size).unwrap(); + self.max_type_limit = MaxTypeLimit::Num(type_ref_limit); for _ in 0..rec_group_size { - let ty = self.arbitrary_sub_type(u, type_ref_limit)?; + let ty = self.arbitrary_sub_type(u)?; self.add_type(ty); } } else { let type_ref_limit = u32::try_from(self.types.len()).unwrap(); - let ty = self.arbitrary_sub_type(u, type_ref_limit)?; + self.max_type_limit = MaxTypeLimit::Num(type_ref_limit); + let ty = self.arbitrary_sub_type(u)?; self.add_type(ty); } + self.max_type_limit = MaxTypeLimit::ModuleTypes; + self.rec_groups.push(rec_group_start..self.types.len()); Ok(()) } @@ -592,40 +616,27 @@ impl Module { Ok(()) } - fn arbitrary_sub_type( - &mut self, - u: &mut Unstructured, - // NB: Types can be referenced up to (but not including) - // `type_ref_limit`. It is an exclusive bound to avoid an option: to - // disallow any reference to a concrete type (e.g. `(ref $type)`) you - // can simply pass `0` here. - type_ref_limit: u32, - ) -> Result { + fn arbitrary_sub_type(&mut self, u: &mut Unstructured) -> Result { if !self.config.gc_enabled { - debug_assert_eq!(type_ref_limit, u32::try_from(self.types.len()).unwrap()); return Ok(SubType { is_final: true, supertype: None, - composite_type: CompositeType::Func(self.arbitrary_func_type(u, type_ref_limit)?), + composite_type: CompositeType::Func(self.arbitrary_func_type(u)?), }); } if !self.can_subtype.is_empty() && u.ratio(1, 32_u8)? { - self.arbitrary_sub_type_of_super_type(u, type_ref_limit) + self.arbitrary_sub_type_of_super_type(u) } else { Ok(SubType { is_final: u.arbitrary()?, supertype: None, - composite_type: self.arbitrary_composite_type(u, type_ref_limit)?, + composite_type: self.arbitrary_composite_type(u)?, }) } } - fn arbitrary_sub_type_of_super_type( - &mut self, - u: &mut Unstructured, - type_ref_limit: u32, - ) -> Result { + fn arbitrary_sub_type_of_super_type(&mut self, u: &mut Unstructured) -> Result { let supertype = *u.choose(&self.can_subtype)?; let mut composite_type = self.types[usize::try_from(supertype).unwrap()] .composite_type @@ -638,7 +649,7 @@ impl Module { *f = self.arbitrary_matching_func_type(u, f)?; } CompositeType::Struct(s) => { - *s = self.arbitrary_matching_struct_type(u, s, type_ref_limit)?; + *s = self.arbitrary_matching_struct_type(u, s)?; } } Ok(SubType { @@ -652,7 +663,6 @@ impl Module { &mut self, u: &mut Unstructured, ty: &StructType, - type_ref_limit: u32, ) -> Result { let len_extra_fields = u.int_in_range(0..=5)?; let mut fields = Vec::with_capacity(ty.fields.len() + len_extra_fields); @@ -660,7 +670,7 @@ impl Module { fields.push(self.arbitrary_matching_field_type(u, *field)?); } for _ in 0..len_extra_fields { - fields.push(self.arbitrary_field_type(u, type_ref_limit)?); + fields.push(self.arbitrary_field_type(u)?); } Ok(StructType { fields: fields.into_boxed_slice(), @@ -892,71 +902,52 @@ impl Module { Ok(*u.choose(&choices)?) } - fn arbitrary_composite_type( - &mut self, - u: &mut Unstructured, - type_ref_limit: u32, - ) -> Result { + fn arbitrary_composite_type(&mut self, u: &mut Unstructured) -> Result { if !self.config.gc_enabled { - return Ok(CompositeType::Func( - self.arbitrary_func_type(u, type_ref_limit)?, - )); + return Ok(CompositeType::Func(self.arbitrary_func_type(u)?)); } match u.int_in_range(0..=2)? { 0 => Ok(CompositeType::Array(ArrayType( - self.arbitrary_field_type(u, type_ref_limit)?, + self.arbitrary_field_type(u)?, ))), - 1 => Ok(CompositeType::Func( - self.arbitrary_func_type(u, type_ref_limit)?, - )), - 2 => Ok(CompositeType::Struct( - self.arbitrary_struct_type(u, type_ref_limit)?, - )), + 1 => Ok(CompositeType::Func(self.arbitrary_func_type(u)?)), + 2 => Ok(CompositeType::Struct(self.arbitrary_struct_type(u)?)), _ => unreachable!(), } } - fn arbitrary_struct_type( - &mut self, - u: &mut Unstructured, - type_ref_limit: u32, - ) -> Result { + fn arbitrary_struct_type(&mut self, u: &mut Unstructured) -> Result { let len = u.int_in_range(0..=20)?; let mut fields = Vec::with_capacity(len); for _ in 0..len { - fields.push(self.arbitrary_field_type(u, type_ref_limit)?); + fields.push(self.arbitrary_field_type(u)?); } Ok(StructType { fields: fields.into_boxed_slice(), }) } - fn arbitrary_field_type( - &mut self, - u: &mut Unstructured, - type_ref_limit: u32, - ) -> Result { + fn arbitrary_field_type(&mut self, u: &mut Unstructured) -> Result { Ok(FieldType { - element_type: self.arbitrary_storage_type(u, type_ref_limit)?, + element_type: self.arbitrary_storage_type(u)?, mutable: u.arbitrary()?, }) } - fn arbitrary_storage_type( - &mut self, - u: &mut Unstructured, - type_ref_limit: u32, - ) -> Result { + fn arbitrary_storage_type(&mut self, u: &mut Unstructured) -> Result { match u.int_in_range(0..=2)? { 0 => Ok(StorageType::I8), 1 => Ok(StorageType::I16), - 2 => Ok(StorageType::Val(self.arbitrary_valtype(u, type_ref_limit)?)), + 2 => Ok(StorageType::Val(self.arbitrary_valtype(u)?)), _ => unreachable!(), } } fn arbitrary_ref_type(&self, u: &mut Unstructured) -> Result { + if !self.config.reference_types_enabled { + return Ok(RefType::FUNCREF); + } Ok(RefType { nullable: true, heap_type: self.arbitrary_heap_type(u)?, @@ -966,9 +957,13 @@ impl Module { fn arbitrary_heap_type(&self, u: &mut Unstructured) -> Result { assert!(self.config.reference_types_enabled); - if self.config.gc_enabled && !self.types.is_empty() && u.arbitrary()? { - let type_ref_limit = u32::try_from(self.types.len()).unwrap(); - let idx = u.int_in_range(0..=type_ref_limit)?; + let concrete_type_limit = match self.max_type_limit { + MaxTypeLimit::Num(n) => n, + MaxTypeLimit::ModuleTypes => u32::try_from(self.types.len()).unwrap(), + }; + + if self.config.gc_enabled && concrete_type_limit > 0 && u.arbitrary()? { + let idx = u.int_in_range(0..=concrete_type_limit - 1)?; return Ok(HeapType::Concrete(idx)); } @@ -995,22 +990,24 @@ impl Module { u.choose(&choices).copied() } - fn arbitrary_func_type( - &mut self, - u: &mut Unstructured, - type_ref_limit: u32, - ) -> Result> { - arbitrary_func_type( - u, - &self.config, - &self.valtypes, - if !self.config.multi_value_enabled { - Some(1) - } else { - None - }, - type_ref_limit, - ) + fn arbitrary_func_type(&mut self, u: &mut Unstructured) -> Result> { + let mut params = vec![]; + let mut results = vec![]; + let max_params = 20; + arbitrary_loop(u, 0, max_params, |u| { + params.push(self.arbitrary_valtype(u)?); + Ok(true) + })?; + let max_results = if self.config.multi_value_enabled { + max_params + } else { + 1 + }; + arbitrary_loop(u, 0, max_results, |u| { + results.push(self.arbitrary_valtype(u)?); + Ok(true) + })?; + Ok(Rc::new(FuncType { params, results })) } fn can_add_local_or_import_tag(&self) -> bool { @@ -1074,7 +1071,7 @@ impl Module { } if self.can_add_local_or_import_table() { choices.push(|u, m| { - let ty = arbitrary_table_type(u, m.config())?; + let ty = arbitrary_table_type(u, m.config(), Some(m))?; Ok(EntityType::Table(ty)) }); } @@ -1388,13 +1385,42 @@ impl Module { .filter(move |i| self.func_type(*i).results.is_empty()) } - fn arbitrary_valtype(&self, u: &mut Unstructured, type_ref_limit: u32) -> Result { - arbitrary_valtype(u, &self.config, &self.valtypes, type_ref_limit) + fn arbitrary_valtype(&self, u: &mut Unstructured) -> Result { + #[derive(Arbitrary)] + enum ValTypeClass { + I32, + I64, + F32, + F64, + V128, + Ref, + } + + match u.arbitrary::()? { + ValTypeClass::I32 => Ok(ValType::I32), + ValTypeClass::I64 => Ok(ValType::I64), + ValTypeClass::F32 => Ok(ValType::F32), + ValTypeClass::F64 => Ok(ValType::F64), + ValTypeClass::V128 => { + if self.config.simd_enabled { + Ok(ValType::V128) + } else { + Ok(ValType::I32) + } + } + ValTypeClass::Ref => { + if self.config.reference_types_enabled { + Ok(ValType::Ref(self.arbitrary_ref_type(u)?)) + } else { + Ok(ValType::I32) + } + } + } } fn arbitrary_global_type(&self, u: &mut Unstructured) -> Result { Ok(GlobalType { - val_type: self.arbitrary_valtype(u, u32::try_from(self.types.len()).unwrap())?, + val_type: self.arbitrary_valtype(u)?, mutable: u.arbitrary()?, }) } @@ -1447,14 +1473,38 @@ impl Module { if !self.can_add_local_or_import_table() { return Ok(false); } - self.num_defined_tables += 1; - let ty = arbitrary_table_type(u, self.config())?; + let ty = arbitrary_table_type(u, self.config(), Some(self))?; + let init = self.arbitrary_table_init(u, ty.element_type)?; + self.defined_tables.push(init); self.tables.push(ty); Ok(true) }, ) } + /// Generates an arbitrary table initialization expression for a table whose + /// element type is `ty`. + /// + /// Table initialization expressions were added by the GC proposal to + /// initialize non-nullable tables. + fn arbitrary_table_init( + &mut self, + u: &mut Unstructured, + ty: RefType, + ) -> Result> { + if !self.config.gc_enabled { + assert!(ty.nullable); + return Ok(None); + } + // Even with the GC proposal an initialization expression is not + // required if the element type is nullable. + if ty.nullable && u.arbitrary()? { + return Ok(None); + } + let expr = self.arbitrary_const_expr(ValType::Ref(ty), u)?; + Ok(Some(expr)) + } + fn arbitrary_memories(&mut self, u: &mut Unstructured) -> Result<()> { arbitrary_loop( u, @@ -1472,64 +1522,87 @@ impl Module { } /// Add a new global of the given type and return its global index. - /// `choices` can be reused in a loop to avoid allocating a new `Vec` each - /// time. fn add_arbitrary_global_of_type( &mut self, ty: GlobalType, - num_imported_globals: usize, u: &mut Unstructured, - choices: &mut Vec Result>>, ) -> Result { + let expr = self.arbitrary_const_expr(ty.val_type, u)?; + let global_idx = self.globals.len() as u32; + self.globals.push(ty); + self.defined_globals.push((global_idx, expr)); + Ok(global_idx) + } + + /// Generates an arbitrary constant expression of the type `ty`. + fn arbitrary_const_expr(&mut self, ty: ValType, u: &mut Unstructured) -> Result { + let mut choices = mem::take(&mut self.const_expr_choices); choices.clear(); let num_funcs = self.funcs.len() as u32; - choices.push(Box::new(move |u, ty| { - Ok(GlobalInitExpr::ConstExpr(match ty { - ValType::I32 => ConstExpr::i32_const(u.arbitrary()?), - ValType::I64 => ConstExpr::i64_const(u.arbitrary()?), - ValType::F32 => ConstExpr::f32_const(u.arbitrary()?), - ValType::F64 => ConstExpr::f64_const(u.arbitrary()?), - ValType::V128 => ConstExpr::v128_const(u.arbitrary()?), - ValType::Ref(ty) => { - assert!(ty.nullable); - if ty.heap_type == HeapType::Func && num_funcs > 0 && u.arbitrary()? { - let func = u.int_in_range(0..=num_funcs - 1)?; - return Ok(GlobalInitExpr::FuncRef(func)); - } - ConstExpr::ref_null(ty.heap_type) + + // MVP wasm can `global.get` any immutable imported global in a + // constant expression, and the GC proposal enables this for all + // globals, so make all matching globals a candidate. + for i in self.globals_for_const_expr(ty) { + choices.push(Box::new(move |_, _| Ok(ConstExpr::global_get(i)))); + } + + // Another option for all types is to have an actual value of each type. + // Change `ty` to any valid subtype of `ty` and then generate a matching + // type of that value. + let ty = self.arbitrary_matching_val_type(u, ty)?; + match ty { + ValType::I32 => choices.push(Box::new(|u, _| Ok(ConstExpr::i32_const(u.arbitrary()?)))), + ValType::I64 => choices.push(Box::new(|u, _| Ok(ConstExpr::i64_const(u.arbitrary()?)))), + ValType::F32 => choices.push(Box::new(|u, _| Ok(ConstExpr::f32_const(u.arbitrary()?)))), + ValType::F64 => choices.push(Box::new(|u, _| Ok(ConstExpr::f64_const(u.arbitrary()?)))), + ValType::V128 => { + choices.push(Box::new(|u, _| Ok(ConstExpr::v128_const(u.arbitrary()?)))) + } + + ValType::Ref(ty) => { + if ty.nullable { + choices.push(Box::new(move |_, _| Ok(ConstExpr::ref_null(ty.heap_type)))); } - })) - })); - for (i, g) in self.globals[..num_imported_globals].iter().enumerate() { - if !g.mutable && g.val_type == ty.val_type { - choices.push(Box::new(move |_, _| { - Ok(GlobalInitExpr::ConstExpr(ConstExpr::global_get(i as u32))) - })); + match ty.heap_type { + HeapType::Func if num_funcs > 0 => { + choices.push(Box::new(move |u, _| { + let func = u.int_in_range(0..=num_funcs - 1)?; + Ok(ConstExpr::ref_func(func)) + })); + } + + HeapType::Concrete(ty) => { + for (i, fty) in self.funcs.iter().map(|(t, _)| *t).enumerate() { + if ty != fty { + continue; + } + choices.push(Box::new(move |_, _| Ok(ConstExpr::ref_func(i as u32)))); + } + } + + // TODO: fill out more GC types e.g `array.new` and + // `struct.new` + _ => {} + } } } let f = u.choose(&choices)?; - let expr = f(u, ty.val_type)?; - let global_idx = self.globals.len() as u32; - self.globals.push(ty); - self.defined_globals.push((global_idx, expr)); - - Ok(global_idx) + let ret = f(u, ty); + self.const_expr_choices = choices; + ret } fn arbitrary_globals(&mut self, u: &mut Unstructured) -> Result<()> { - let mut choices: Vec Result>> = - vec![]; - let num_imported_globals = self.globals.len(); - arbitrary_loop(u, self.config.min_globals, self.config.max_globals, |u| { if !self.can_add_local_or_import_global() { return Ok(false); } let ty = self.arbitrary_global_type(u)?; - self.add_arbitrary_global_of_type(ty, num_imported_globals, u, &mut choices)?; + self.add_arbitrary_global_of_type(ty, u)?; Ok(true) }) @@ -1617,8 +1690,6 @@ impl Module { } // For each export, add necessary prerequisites to the module. - let mut choices = vec![]; - let num_imported_globals = self.globals.len() - self.defined_globals.len(); for export in required_exports { let new_index = match exports_types .entity_type_from_export(&export) @@ -1684,9 +1755,7 @@ impl Module { val_type: convert_val_type(&global_type.content_type), mutable: global_type.mutable, }, - num_imported_globals, u, - &mut choices, )?, wasmparser::types::EntityType::Table(_) | wasmparser::types::EntityType::Memory(_) @@ -1809,24 +1878,17 @@ impl Module { } fn arbitrary_elems(&mut self, u: &mut Unstructured) -> Result<()> { - let func_max = self.funcs.len() as u32; - // Create a helper closure to choose an arbitrary offset. let mut offset_global_choices = vec![]; if !self.config.disallow_traps { - for (i, g) in self.globals[..self.globals.len() - self.defined_globals.len()] - .iter() - .enumerate() - { - if !g.mutable && g.val_type == ValType::I32 { - offset_global_choices.push(i as u32); - } + for i in self.globals_for_const_expr(ValType::I32) { + offset_global_choices.push(i); } } + let disallow_traps = self.config.disallow_traps; let arbitrary_active_elem = |u: &mut Unstructured, min_mem_size: u32, table: Option, - disallow_traps: bool, table_ty: &TableType| { let (offset, max_size_hint) = if !offset_global_choices.is_empty() && u.arbitrary()? { let g = u.choose(&offset_global_choices)?; @@ -1851,11 +1913,20 @@ impl Module { Ok((ElementKind::Active { table, offset }, max_size_hint)) }; + // Generate a list of candidates for "kinds" of elements segments. For + // example we can have an active segment for any existing table or + // passive/declared segments if the right wasm features are enabled. type GenElemSegment<'a> = dyn Fn(&mut Unstructured) -> Result<(ElementKind, Option)> + 'a; - let mut funcrefs: Vec> = Vec::new(); - let mut externrefs: Vec> = Vec::new(); - let disallow_traps = self.config.disallow_traps; + let mut choices: Vec> = Vec::new(); + + // Bulk memory enables passive/declared segments, and note that the + // types used are selected later. + if self.config.bulk_memory_enabled { + choices.push(Box::new(|_| Ok((ElementKind::Passive, None)))); + choices.push(Box::new(|_| Ok((ElementKind::Declared, None)))); + } + for (i, ty) in self.tables.iter().enumerate() { // If this table starts with no capacity then any non-empty element // segment placed onto it will immediately trap, which isn't too @@ -1865,93 +1936,100 @@ impl Module { continue; } - let dst = if ty.element_type == RefType::FUNCREF { - &mut funcrefs - } else { - &mut externrefs - }; let minimum = ty.minimum; // If the first table is a funcref table then it's a candidate for // the MVP encoding of element segments. + let ty = *ty; if i == 0 && ty.element_type == RefType::FUNCREF { - dst.push(Box::new(move |u| { - arbitrary_active_elem(u, minimum, None, disallow_traps, ty) + choices.push(Box::new(move |u| { + arbitrary_active_elem(u, minimum, None, &ty) })); } if self.config.bulk_memory_enabled { let idx = Some(i as u32); - dst.push(Box::new(move |u| { - arbitrary_active_elem(u, minimum, idx, disallow_traps, ty) + choices.push(Box::new(move |u| { + arbitrary_active_elem(u, minimum, idx, &ty) })); } } - // Bulk memory enables passive/declared segments for funcrefs, and - // reference types additionally enables the segments for externrefs. - if self.config.bulk_memory_enabled { - funcrefs.push(Box::new(|_| Ok((ElementKind::Passive, None)))); - funcrefs.push(Box::new(|_| Ok((ElementKind::Declared, None)))); - if self.config.reference_types_enabled { - externrefs.push(Box::new(|_| Ok((ElementKind::Passive, None)))); - externrefs.push(Box::new(|_| Ok((ElementKind::Declared, None)))); - } - } - - let mut choices = Vec::new(); - if !funcrefs.is_empty() { - choices.push((&funcrefs, RefType::FUNCREF)); - } - if !externrefs.is_empty() { - choices.push((&externrefs, RefType::EXTERNREF)); - } - if choices.is_empty() { return Ok(()); } + arbitrary_loop( u, self.config.min_element_segments, self.config.max_element_segments, |u| { - // Choose whether to generate a segment whose elements are initialized via - // expressions, or one whose elements are initialized via function indices. - let (kind_candidates, ty) = *u.choose(&choices)?; - - // Select a kind for this segment now that we know the number of - // items the segment will hold. - let (kind, max_size_hint) = u.choose(kind_candidates)?(u)?; + // Pick a kind of element segment to generate which will also + // give us a hint of the maximum size, if any. + let (kind, max_size_hint) = u.choose(&choices)?(u)?; let max = max_size_hint .map(|i| usize::try_from(i).unwrap()) .unwrap_or_else(|| self.config.max_elements); - // Pick whether we're going to use expression elements or - // indices. Note that externrefs must use expressions, - // and functions without reference types must use indices. - let items = if ty == RefType::EXTERNREF - || (self.config.reference_types_enabled && u.arbitrary()?) + // Infer, from the kind of segment, the type of the element + // segment. Passive/declared segments can be declared with any + // reference type, but active segments must match their table. + let ty = match kind { + ElementKind::Passive | ElementKind::Declared => self.arbitrary_ref_type(u)?, + ElementKind::Active { table, .. } => { + let idx = table.unwrap_or(0); + self.arbitrary_matching_ref_type(u, self.tables[idx as usize].element_type)? + } + }; + + // The `Elements::Functions` encoding is only possible when the + // element type is a `funcref` because the binary format can't + // allow encoding any other type in that form. + let can_use_function_list = ty == RefType::FUNCREF; + if !self.config.reference_types_enabled { + assert!(can_use_function_list); + } + + // If a function list is possible then build up a list of + // functions that can be selected from. + let mut func_candidates = Vec::new(); + if can_use_function_list { + match ty.heap_type { + HeapType::Func => { + func_candidates.extend(0..self.funcs.len() as u32); + } + HeapType::Concrete(ty) => { + for (i, (fty, _)) in self.funcs.iter().enumerate() { + if *fty == ty { + func_candidates.push(i as u32); + } + } + } + _ => {} + } + } + + // And finally actually generate the arbitrary elements of this + // element segment. Function indices are used if they're either + // forced or allowed, and otherwise expressions are used + // instead. + let items = if !self.config.reference_types_enabled + || (can_use_function_list && u.arbitrary()?) { let mut init = vec![]; - arbitrary_loop(u, self.config.min_elements, max, |u| { - init.push( - if ty == RefType::EXTERNREF || func_max == 0 || u.arbitrary()? { - None - } else { - Some(u.int_in_range(0..=func_max - 1)?) - }, - ); - Ok(true) - })?; - Elements::Expressions(init) - } else { - let mut init = vec![]; - if func_max > 0 { + if func_candidates.len() > 0 { arbitrary_loop(u, self.config.min_elements, max, |u| { - let func_idx = u.int_in_range(0..=func_max - 1)?; + let func_idx = *u.choose(&func_candidates)?; init.push(func_idx); Ok(true) })?; } Elements::Functions(init) + } else { + let mut init = vec![]; + arbitrary_loop(u, self.config.min_elements, max, |u| { + init.push(self.arbitrary_const_expr(ValType::Ref(ty), u)?); + Ok(true) + })?; + Elements::Expressions(init) }; self.elems.push(ElementSegment { kind, ty, items }); @@ -1994,7 +2072,7 @@ impl Module { fn arbitrary_locals(&self, u: &mut Unstructured) -> Result> { let mut ret = Vec::new(); arbitrary_loop(u, 0, 100, |u| { - ret.push(self.arbitrary_valtype(u, u32::try_from(self.types.len()).unwrap())?); + ret.push(self.arbitrary_valtype(u)?); Ok(true) })?; Ok(ret) @@ -2029,18 +2107,11 @@ impl Module { )) })); if !self.config.disallow_traps { - for (i, g) in self.globals[..self.globals.len() - self.defined_globals.len()] - .iter() - .enumerate() - { - if g.mutable { - continue; - } - if g.val_type == ValType::I32 { - choices32.push(Box::new(move |_, _, _| Ok(Offset::Global(i as u32)))); - } else if g.val_type == ValType::I64 { - choices64.push(Box::new(move |_, _, _| Ok(Offset::Global(i as u32)))); - } + for i in self.globals_for_const_expr(ValType::I32) { + choices32.push(Box::new(move |_, _, _| Ok(Offset::Global(i)))); + } + for i in self.globals_for_const_expr(ValType::I64) { + choices64.push(Box::new(move |_, _, _| Ok(Offset::Global(i)))); } } @@ -2126,6 +2197,33 @@ impl Module { } } } + + /// Returns an iterator of all globals which can be used in constant + /// expressions for a value of type `ty` specified. + fn globals_for_const_expr(&self, ty: ValType) -> impl Iterator + '_ { + // Before the GC proposal only imported globals could be referenced, but + // the GC proposal relaxed this feature to allow any global. + let num_imported_globals = self.globals.len() - self.defined_globals.len(); + let max_global = if self.config.gc_enabled { + self.globals.len() + } else { + num_imported_globals + }; + + self.globals[..max_global] + .iter() + .enumerate() + .filter_map(move |(i, g)| { + // Mutable globals cannot participate in constant expressions, + // but otherwise so long as the global is a subtype of the + // desired type it's a candidate. + if !g.mutable && self.val_type_is_sub_type(g.val_type, ty) { + Some(i as u32) + } else { + None + } + }) + } } pub(crate) fn arbitrary_limits32( @@ -2208,49 +2306,11 @@ pub(crate) fn configured_valtypes(config: &Config) -> Vec { valtypes } -pub(crate) fn arbitrary_func_type( +pub(crate) fn arbitrary_table_type( u: &mut Unstructured, config: &Config, - valtypes: &[ValType], - max_results: Option, - type_ref_limit: u32, -) -> Result> { - let mut params = vec![]; - let mut results = vec![]; - arbitrary_loop(u, 0, 20, |u| { - params.push(arbitrary_valtype(u, config, valtypes, type_ref_limit)?); - Ok(true) - })?; - arbitrary_loop(u, 0, max_results.unwrap_or(20), |u| { - results.push(arbitrary_valtype(u, config, valtypes, type_ref_limit)?); - Ok(true) - })?; - Ok(Rc::new(FuncType { params, results })) -} - -fn arbitrary_valtype( - u: &mut Unstructured, - config: &Config, - valtypes: &[ValType], - type_ref_limit: u32, -) -> Result { - if config.gc_enabled && type_ref_limit > 0 && u.ratio(1, 20)? { - Ok(ValType::Ref(RefType { - // TODO: For now, only create allow nullable reference - // types. Eventually we should support non-nullable reference types, - // but this means that we will also need to recognize when it is - // impossible to create an instance of the reference (eg `(ref - // nofunc)` has no instances, and self-referential types that - // contain a non-null self-reference are also impossible to create). - nullable: true, - heap_type: HeapType::Concrete(u.int_in_range(0..=type_ref_limit - 1)?), - })) - } else { - Ok(*u.choose(valtypes)?) - } -} - -pub(crate) fn arbitrary_table_type(u: &mut Unstructured, config: &Config) -> Result { + module: Option<&Module>, +) -> Result { // We don't want to generate tables that are too large on average, so // keep the "inbounds" limit here a bit smaller. let max_inbounds = 10_000; @@ -2266,12 +2326,12 @@ pub(crate) fn arbitrary_table_type(u: &mut Unstructured, config: &Config) -> Res if config.disallow_traps { assert!(minimum > 0); } + let element_type = match module { + Some(module) => module.arbitrary_ref_type(u)?, + None => RefType::FUNCREF, + }; Ok(TableType { - element_type: if config.reference_types_enabled { - *u.choose(&[RefType::FUNCREF, RefType::EXTERNREF])? - } else { - RefType::FUNCREF - }, + element_type, minimum, maximum, }) diff --git a/crates/wasm-smith/src/core/code_builder.rs b/crates/wasm-smith/src/core/code_builder.rs index a247d74748..a55c5aafda 100644 --- a/crates/wasm-smith/src/core/code_builder.rs +++ b/crates/wasm-smith/src/core/code_builder.rs @@ -1,6 +1,6 @@ use super::{ - CompositeType, Elements, FuncType, GlobalInitExpr, Instruction, InstructionKind::*, - InstructionKinds, Module, ValType, + CompositeType, Elements, FuncType, Instruction, InstructionKind::*, InstructionKinds, Module, + ValType, }; use crate::{unique_string, MemoryOffsetChoices}; use arbitrary::{Result, Unstructured}; @@ -744,14 +744,14 @@ impl CodeBuilderAllocations { let mut referenced_functions = BTreeSet::new(); for (_, expr) in module.defined_globals.iter() { - if let GlobalInitExpr::FuncRef(i) = *expr { + if let Some(i) = expr.get_ref_func() { referenced_functions.insert(i); } } for g in module.elems.iter() { match &g.items { Elements::Expressions(e) => { - let iter = e.iter().filter_map(|i| *i); + let iter = e.iter().filter_map(|e| e.get_ref_func()); referenced_functions.extend(iter); } Elements::Functions(e) => { @@ -871,7 +871,6 @@ impl CodeBuilderAllocations { val_type: ty, mutable: true, }); - let init = GlobalInitExpr::ConstExpr(init); module.defined_globals.push((global_idx, init)); if self.disallow_exporting || u.ratio(1, 100).unwrap_or(false) { @@ -1155,12 +1154,7 @@ impl CodeBuilder<'_> { fn arbitrary_block_type(&self, u: &mut Unstructured, module: &Module) -> Result { let mut options: Vec Result>> = vec![ Box::new(|_| Ok(BlockType::Empty)), - Box::new(|u| { - Ok(BlockType::Result(module.arbitrary_valtype( - u, - u32::try_from(module.types.len()).unwrap(), - )?)) - }), + Box::new(|u| Ok(BlockType::Result(module.arbitrary_valtype(u)?))), ]; if module.config.multi_value_enabled { for (i, ty) in module.func_types() { diff --git a/crates/wasm-smith/src/core/encode.rs b/crates/wasm-smith/src/core/encode.rs index 4ed47ce025..5a661509a6 100644 --- a/crates/wasm-smith/src/core/encode.rs +++ b/crates/wasm-smith/src/core/encode.rs @@ -121,12 +121,22 @@ impl Module { } fn encode_tables(&self, module: &mut wasm_encoder::Module) { - if self.num_defined_tables == 0 { + if self.defined_tables.is_empty() { return; } let mut tables = wasm_encoder::TableSection::new(); - for t in self.tables[self.tables.len() - self.num_defined_tables..].iter() { - tables.table(*t); + for (t, init) in self.tables[self.tables.len() - self.defined_tables.len()..] + .iter() + .zip(&self.defined_tables) + { + match init { + Some(init) => { + tables.table_with_init(*t, init); + } + None => { + tables.table(*t); + } + } } module.section(&tables); } @@ -149,10 +159,7 @@ impl Module { let mut globals = wasm_encoder::GlobalSection::new(); for (idx, expr) in &self.defined_globals { let ty = &self.globals[*idx as usize]; - match expr { - GlobalInitExpr::ConstExpr(expr) => globals.global(*ty, expr), - GlobalInitExpr::FuncRef(func) => globals.global(*ty, &ConstExpr::ref_func(*func)), - }; + globals.global(*ty, expr); } module.section(&globals); } @@ -179,24 +186,13 @@ impl Module { return; } let mut elems = wasm_encoder::ElementSection::new(); - let mut exps = vec![]; for el in &self.elems { let elements = match &el.items { - Elements::Expressions(es) => { - exps.clear(); - exps.extend(es.iter().map(|e| { - // TODO(nagisa): generate global.get of imported ref globals too. - match e { - Some(i) => match el.ty { - RefType::FUNCREF => wasm_encoder::ConstExpr::ref_func(*i), - _ => unreachable!(), - }, - None => wasm_encoder::ConstExpr::ref_null(el.ty.heap_type), - } - })); - wasm_encoder::Elements::Expressions(el.ty, &exps) + Elements::Expressions(es) => wasm_encoder::Elements::Expressions(el.ty, es), + Elements::Functions(fs) => { + assert_eq!(el.ty, RefType::FUNCREF); + wasm_encoder::Elements::Functions(fs) } - Elements::Functions(fs) => wasm_encoder::Elements::Functions(fs), }; match &el.kind { ElementKind::Active { table, offset } => { diff --git a/crates/wasm-smith/src/core/terminate.rs b/crates/wasm-smith/src/core/terminate.rs index 72e7078156..7983c35be6 100644 --- a/crates/wasm-smith/src/core/terminate.rs +++ b/crates/wasm-smith/src/core/terminate.rs @@ -1,6 +1,5 @@ use super::*; use anyhow::{bail, Result}; -use std::mem; impl Module { /// Ensure that all of this Wasm module's functions will terminate when @@ -25,10 +24,8 @@ impl Module { val_type: ValType::I32, mutable: true, }); - self.defined_globals.push(( - fuel_global, - GlobalInitExpr::ConstExpr(ConstExpr::i32_const(default_fuel as i32)), - )); + self.defined_globals + .push((fuel_global, ConstExpr::i32_const(default_fuel as i32))); for code in &mut self.code { let check_fuel = |insts: &mut Vec| { diff --git a/crates/wasm-smith/tests/core.rs b/crates/wasm-smith/tests/core.rs index 36db8adb0e..7286952dc6 100644 --- a/crates/wasm-smith/tests/core.rs +++ b/crates/wasm-smith/tests/core.rs @@ -128,6 +128,7 @@ fn smoke_test_wasm_gc() { let mut u = Unstructured::new(&buf); let config = Config { gc_enabled: true, + reference_types_enabled: true, ..Config::default() }; if let Ok(module) = Module::new(config, &mut u) {