diff --git a/.github/scripts/ci-test.sh b/.github/scripts/ci-test.sh
index ad6b1127a0..041a4bb816 100755
--- a/.github/scripts/ci-test.sh
+++ b/.github/scripts/ci-test.sh
@@ -37,12 +37,7 @@ for fn in $(ls src/tests/*.rs); do
 
     # Run the test with each plan it needs.
     for MMTK_PLAN in $PLANS; do
-        # Deal with mark sweep specially, we only have malloc mark sweep, and we need to enable the feature to make it work.
-        if [[ $MMTK_PLAN == 'MarkSweep' ]]; then
-            env MMTK_PLAN=$MMTK_PLAN cargo test --features "malloc_mark_sweep,$FEATURES" -- $t;
-        else
-            env MMTK_PLAN=$MMTK_PLAN cargo test --features "$FEATURES" -- $t;
-        fi
+        env MMTK_PLAN=$MMTK_PLAN cargo test --features "$FEATURES" -- $t;
     done
 done
 
diff --git a/Cargo.toml b/Cargo.toml
index cea5749315..d329b57bda 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -42,6 +42,7 @@ atomic_refcell = "0.1.7"
 strum = "0.24"
 strum_macros = "0.24"
 cfg-if = "1.0"
+itertools = "0.10.5"
 
 [dev-dependencies]
 rand = "0.7.3"
@@ -115,11 +116,6 @@ work_packet_stats = []
 # Count the malloc'd memory into the heap size
 malloc_counted_size = []
 
-# Use library malloc as the freelist allocator for mark sweep. This will makes mark sweep slower. As malloc may return addresses outside our
-# normal heap range, we will have to use chunk-based SFT table. Turning on this feature will use a different SFT map implementation on 64bits,
-# and will affect all the plans in the build. Please be aware of the consequence, and this is only meant to be experimental use.
-malloc_mark_sweep = []
-
 # Do not modify the following line - ci-common.sh matches it
 # -- Mutally exclusive features --
 # Only one feature from each group can be provided. Otherwise build will fail.
@@ -131,6 +127,18 @@ malloc_mark_sweep = []
 malloc_mimalloc = ["mimalloc-sys"]
 malloc_jemalloc = ["jemalloc-sys"]
 malloc_hoard = ["hoard-sys"]
+# Use the native mimalloc allocator for malloc. This is not tested by me (Yi) yet, and it is only used to make sure that some code
+# is not compiled in default builds.
+malloc_native_mimalloc = []
 
 # If there are more groups, they should be inserted above this line
 # Group:end
+
+# Group:marksweepallocation
+# default is native allocator with lazy sweeping
+eager_sweeping = []
+# Use library malloc as the freelist allocator for mark sweep. This will makes mark sweep slower. As malloc may return addresses outside our
+# normal heap range, we will have to use chunk-based SFT table. Turning on this feature will use a different SFT map implementation on 64bits,
+# and will affect all the plans in the build. Please be aware of the consequence, and this is only meant to be experimental use.
+malloc_mark_sweep = []
+# Group:end
diff --git a/src/memory_manager.rs b/src/memory_manager.rs
index 182d0d0322..5310aa4995 100644
--- a/src/memory_manager.rs
+++ b/src/memory_manager.rs
@@ -84,8 +84,11 @@ pub fn mmtk_init<VM: VMBinding>(builder: &MMTKBuilder) -> Box<MMTK<VM>> {
     Box::new(mmtk)
 }
 
-/// Request MMTk to create a mutator for the given thread. For performance reasons, A VM should
-/// store the returned mutator in a thread local storage that can be accessed efficiently.
+/// Request MMTk to create a mutator for the given thread. The ownership
+/// of returned boxed mutator is transferred to the binding, and the binding needs to take care of its
+/// lifetime. For performance reasons, A VM should store the returned mutator in a thread local storage
+/// that can be accessed efficiently. A VM may also copy and embed the mutator stucture to a thread-local data
+/// structure, and use that as a reference to the mutator (it is okay to drop the box once the content is copied).
 ///
 /// Arguments:
 /// * `mmtk`: A reference to an MMTk instance.
@@ -103,12 +106,14 @@ pub fn bind_mutator<VM: VMBinding>(
     mutator
 }
 
-/// Reclaim a mutator that is no longer needed.
+/// Report to MMTk that a mutator is no longer needed. A binding should not attempt
+/// to use the mutator after this call. MMTk will not attempt to reclaim the memory for the
+/// mutator, so a binding should properly reclaim the memory for the mutator after this call.
 ///
 /// Arguments:
 /// * `mutator`: A reference to the mutator to be destroyed.
-pub fn destroy_mutator<VM: VMBinding>(mutator: Box<Mutator<VM>>) {
-    drop(mutator);
+pub fn destroy_mutator<VM: VMBinding>(mutator: &mut Mutator<VM>) {
+    mutator.on_destroy();
 }
 
 /// Flush the mutator's local states.
@@ -144,6 +149,12 @@ pub fn alloc<VM: VMBinding>(
     // If you plan to use MMTk with a VM with its object size smaller than MMTk's min object size, you should
     // meet the min object size in the fastpath.
     debug_assert!(size >= MIN_OBJECT_SIZE);
+    // Assert alignment
+    debug_assert!(align >= VM::MIN_ALIGNMENT);
+    debug_assert!(align <= VM::MAX_ALIGNMENT);
+    // Assert offset
+    debug_assert!(VM::USE_ALLOCATION_OFFSET || offset == 0);
+
     mutator.alloc(size, align, offset, semantics)
 }
 
diff --git a/src/plan/global.rs b/src/plan/global.rs
index 840f1046dd..8a51c99cf6 100644
--- a/src/plan/global.rs
+++ b/src/plan/global.rs
@@ -81,7 +81,7 @@ pub fn create_plan<VM: VMBinding>(
             vm_map, mmapper, options, scheduler,
         )) as Box<dyn Plan<VM = VM>>,
         PlanSelector::MarkSweep => Box::new(crate::plan::marksweep::MarkSweep::new(
-            vm_map, mmapper, options,
+            vm_map, mmapper, options, scheduler,
         )) as Box<dyn Plan<VM = VM>>,
         PlanSelector::Immix => Box::new(crate::plan::immix::Immix::new(
             vm_map, mmapper, options, scheduler,
diff --git a/src/plan/marksweep/gc_work.rs b/src/plan/marksweep/gc_work.rs
index 01933edebd..90ed731dd6 100644
--- a/src/plan/marksweep/gc_work.rs
+++ b/src/plan/marksweep/gc_work.rs
@@ -1,77 +1,9 @@
-use crate::policy::mallocspace::metadata::is_chunk_mapped;
-use crate::policy::mallocspace::metadata::is_chunk_marked_unsafe;
-use crate::policy::mallocspace::MallocSpace;
-use crate::scheduler::{GCWork, GCWorker, WorkBucketStage};
-use crate::util::heap::layout::vm_layout_constants::BYTES_IN_CHUNK;
-use crate::util::Address;
-use crate::vm::VMBinding;
-use crate::MMTK;
-use std::sync::atomic::Ordering;
-
 use super::MarkSweep;
-
-/// Simple work packet that just sweeps a single chunk
-pub struct MSSweepChunk<VM: VMBinding> {
-    ms: &'static MallocSpace<VM>,
-    // starting address of a chunk
-    chunk: Address,
-}
-
-impl<VM: VMBinding> GCWork<VM> for MSSweepChunk<VM> {
-    #[inline]
-    fn do_work(&mut self, _worker: &mut GCWorker<VM>, _mmtk: &'static MMTK<VM>) {
-        self.ms.sweep_chunk(self.chunk);
-    }
-}
-
-/// Work packet that generates sweep jobs for gc workers. Each chunk is given its own work packet
-pub struct MSSweepChunks<VM: VMBinding> {
-    plan: &'static MarkSweep<VM>,
-}
-
-impl<VM: VMBinding> MSSweepChunks<VM> {
-    pub fn new(plan: &'static MarkSweep<VM>) -> Self {
-        Self { plan }
-    }
-}
-
-impl<VM: VMBinding> GCWork<VM> for MSSweepChunks<VM> {
-    #[inline]
-    fn do_work(&mut self, _worker: &mut GCWorker<VM>, mmtk: &'static MMTK<VM>) {
-        let ms = self.plan.ms_space();
-        let mut work_packets: Vec<Box<dyn GCWork<VM>>> = vec![];
-        let mut chunk = unsafe { Address::from_usize(ms.chunk_addr_min.load(Ordering::Relaxed)) }; // XXX: have to use AtomicUsize to represent an Address
-        let end = unsafe { Address::from_usize(ms.chunk_addr_max.load(Ordering::Relaxed)) }
-            + BYTES_IN_CHUNK;
-
-        // Since only a single thread generates the sweep work packets as well as it is a Stop-the-World collector,
-        // we can assume that the chunk mark metadata is not being accessed by anything else and hence we use
-        // non-atomic accesses
-        while chunk < end {
-            if is_chunk_mapped(chunk) && unsafe { is_chunk_marked_unsafe(chunk) } {
-                work_packets.push(Box::new(MSSweepChunk { ms, chunk }));
-            }
-
-            chunk += BYTES_IN_CHUNK;
-        }
-
-        debug!("Generated {} sweep work packets", work_packets.len());
-        #[cfg(debug_assertions)]
-        {
-            ms.total_work_packets
-                .store(work_packets.len() as u32, Ordering::SeqCst);
-            ms.completed_work_packets.store(0, Ordering::SeqCst);
-            ms.work_live_bytes.store(0, Ordering::SeqCst);
-        }
-
-        mmtk.scheduler.work_buckets[WorkBucketStage::Release].bulk_add(work_packets);
-    }
-}
-
-pub struct MSGCWorkContext<VM: VMBinding>(std::marker::PhantomData<VM>);
 use crate::policy::gc_work::DEFAULT_TRACE;
-use crate::scheduler::gc_work::PlanProcessEdges;
+use crate::scheduler::gc_work::*;
+use crate::vm::VMBinding;
 
+pub struct MSGCWorkContext<VM: VMBinding>(std::marker::PhantomData<VM>);
 impl<VM: VMBinding> crate::scheduler::GCWorkContext for MSGCWorkContext<VM> {
     type VM = VM;
     type PlanType = MarkSweep<VM>;
diff --git a/src/plan/marksweep/global.rs b/src/plan/marksweep/global.rs
index 8029a13b79..40d272d70a 100644
--- a/src/plan/marksweep/global.rs
+++ b/src/plan/marksweep/global.rs
@@ -1,37 +1,42 @@
 use crate::plan::global::BasePlan;
 use crate::plan::global::CommonPlan;
 use crate::plan::global::GcStatus;
-use crate::plan::marksweep::gc_work::{MSGCWorkContext, MSSweepChunks};
+use crate::plan::marksweep::gc_work::MSGCWorkContext;
 use crate::plan::marksweep::mutator::ALLOCATOR_MAPPING;
 use crate::plan::AllocationSemantics;
 use crate::plan::Plan;
 use crate::plan::PlanConstraints;
-use crate::policy::mallocspace::metadata::ACTIVE_CHUNK_METADATA_SPEC;
-use crate::policy::mallocspace::MallocSpace;
 use crate::policy::space::Space;
-use crate::scheduler::*;
+use crate::scheduler::GCWorkScheduler;
 use crate::util::alloc::allocators::AllocatorSelector;
-#[cfg(not(feature = "global_alloc_bit"))]
-use crate::util::alloc_bit::ALLOC_SIDE_METADATA_SPEC;
 use crate::util::heap::layout::heap_layout::Mmapper;
 use crate::util::heap::layout::heap_layout::VMMap;
 use crate::util::heap::HeapMeta;
+use crate::util::heap::VMRequest;
 use crate::util::metadata::side_metadata::{SideMetadataContext, SideMetadataSanity};
 use crate::util::options::Options;
 use crate::util::VMWorkerThread;
 use crate::vm::VMBinding;
+use enum_map::EnumMap;
+use mmtk_macros::PlanTraceObject;
 use std::sync::Arc;
 
-use enum_map::EnumMap;
+#[cfg(feature = "malloc_mark_sweep")]
+pub type MarkSweepSpace<VM> = crate::policy::marksweepspace::malloc_ms::MallocSpace<VM>;
+#[cfg(feature = "malloc_mark_sweep")]
+use crate::policy::marksweepspace::malloc_ms::MAX_OBJECT_SIZE;
 
-use mmtk_macros::PlanTraceObject;
+#[cfg(not(feature = "malloc_mark_sweep"))]
+pub type MarkSweepSpace<VM> = crate::policy::marksweepspace::native_ms::MarkSweepSpace<VM>;
+#[cfg(not(feature = "malloc_mark_sweep"))]
+use crate::policy::marksweepspace::native_ms::MAX_OBJECT_SIZE;
 
 #[derive(PlanTraceObject)]
 pub struct MarkSweep<VM: VMBinding> {
     #[fallback_trace]
     common: CommonPlan<VM>,
     #[trace]
-    ms: MallocSpace<VM>,
+    ms: MarkSweepSpace<VM>,
 }
 
 pub const MS_CONSTRAINTS: PlanConstraints = PlanConstraints {
@@ -39,6 +44,7 @@ pub const MS_CONSTRAINTS: PlanConstraints = PlanConstraints {
     gc_header_bits: 2,
     gc_header_words: 0,
     num_specialized_scans: 1,
+    max_non_los_default_alloc_bytes: MAX_OBJECT_SIZE,
     may_trace_duplicate_edges: true,
     ..PlanConstraints::default()
 };
@@ -56,7 +62,6 @@ impl<VM: VMBinding> Plan for MarkSweep<VM> {
         self.base().set_collection_kind::<Self>(self);
         self.base().set_gc_status(GcStatus::GcPrepare);
         scheduler.schedule_common_work::<MSGCWorkContext<VM>>(self);
-        scheduler.work_buckets[WorkBucketStage::Prepare].add(MSSweepChunks::<VM>::new(self));
     }
 
     fn get_allocator_mapping(&self) -> &'static EnumMap<AllocationSemantics, AllocatorSelector> {
@@ -65,11 +70,11 @@ impl<VM: VMBinding> Plan for MarkSweep<VM> {
 
     fn prepare(&mut self, tls: VMWorkerThread) {
         self.common.prepare(tls, true);
-        // Dont need to prepare for MallocSpace
+        self.ms.prepare();
     }
 
     fn release(&mut self, tls: VMWorkerThread) {
-        trace!("Marksweep: Release");
+        self.ms.release();
         self.common.release(tls, true);
     }
 
@@ -95,47 +100,49 @@ impl<VM: VMBinding> Plan for MarkSweep<VM> {
 }
 
 impl<VM: VMBinding> MarkSweep<VM> {
-    pub fn new(vm_map: &'static VMMap, mmapper: &'static Mmapper, options: Arc<Options>) -> Self {
-        let heap = HeapMeta::new(&options);
-        // if global_alloc_bit is enabled, ALLOC_SIDE_METADATA_SPEC will be added to
-        // SideMetadataContext by default, so we don't need to add it here.
-        #[cfg(feature = "global_alloc_bit")]
-        let global_metadata_specs =
-            SideMetadataContext::new_global_specs(&[ACTIVE_CHUNK_METADATA_SPEC]);
-        // if global_alloc_bit is NOT enabled,
-        // we need to add ALLOC_SIDE_METADATA_SPEC to SideMetadataContext here.
-        #[cfg(not(feature = "global_alloc_bit"))]
-        let global_metadata_specs = SideMetadataContext::new_global_specs(&[
-            ALLOC_SIDE_METADATA_SPEC,
-            ACTIVE_CHUNK_METADATA_SPEC,
-        ]);
-
-        let res = MarkSweep {
-            ms: MallocSpace::new(global_metadata_specs.clone()),
-            common: CommonPlan::new(
+    pub fn new(
+        vm_map: &'static VMMap,
+        mmapper: &'static Mmapper,
+        options: Arc<Options>,
+        scheduler: Arc<GCWorkScheduler<VM>>,
+    ) -> Self {
+        let mut heap = HeapMeta::new(&options);
+        let mut global_metadata_specs = SideMetadataContext::new_global_specs(&[]);
+        MarkSweepSpace::<VM>::extend_global_side_metadata_specs(&mut global_metadata_specs);
+
+        let res = {
+            let ms = MarkSweepSpace::new(
+                "MarkSweepSpace",
+                false,
+                VMRequest::discontiguous(),
+                global_metadata_specs.clone(),
+                vm_map,
+                mmapper,
+                &mut heap,
+                scheduler,
+            );
+
+            let common = CommonPlan::new(
                 vm_map,
                 mmapper,
                 options,
                 heap,
                 &MS_CONSTRAINTS,
                 global_metadata_specs,
-            ),
-        };
+            );
 
-        // Use SideMetadataSanity to check if each spec is valid. This is also needed for check
-        // side metadata in extreme_assertions.
-        {
-            let mut side_metadata_sanity_checker = SideMetadataSanity::new();
-            res.common
-                .verify_side_metadata_sanity(&mut side_metadata_sanity_checker);
-            res.ms
-                .verify_side_metadata_sanity(&mut side_metadata_sanity_checker);
-        }
+            MarkSweep { common, ms }
+        };
 
+        let mut side_metadata_sanity_checker = SideMetadataSanity::new();
+        res.common
+            .verify_side_metadata_sanity(&mut side_metadata_sanity_checker);
+        res.ms
+            .verify_side_metadata_sanity(&mut side_metadata_sanity_checker);
         res
     }
 
-    pub fn ms_space(&self) -> &MallocSpace<VM> {
+    pub fn ms_space(&self) -> &MarkSweepSpace<VM> {
         &self.ms
     }
 }
diff --git a/src/plan/marksweep/mod.rs b/src/plan/marksweep/mod.rs
index 634ba126de..82044cdabe 100644
--- a/src/plan/marksweep/mod.rs
+++ b/src/plan/marksweep/mod.rs
@@ -1,4 +1,4 @@
-//! Plan: marksweep (currently using malloc as its freelist allocator)
+//! Plan: marksweep
 
 mod gc_work;
 mod global;
diff --git a/src/plan/marksweep/mutator.rs b/src/plan/marksweep/mutator.rs
index 1bdc25895d..f79bda0418 100644
--- a/src/plan/marksweep/mutator.rs
+++ b/src/plan/marksweep/mutator.rs
@@ -1,51 +1,121 @@
-use super::MarkSweep;
 use crate::plan::barriers::NoBarrier;
+use crate::plan::marksweep::MarkSweep;
 use crate::plan::mutator_context::create_allocator_mapping;
-use crate::plan::mutator_context::create_space_mapping;
 use crate::plan::mutator_context::Mutator;
 use crate::plan::mutator_context::MutatorConfig;
 use crate::plan::mutator_context::ReservedAllocators;
+use crate::plan::mutator_context::SpaceMapping;
 use crate::plan::AllocationSemantics;
-use crate::util::alloc::allocators::AllocatorSelector;
-use crate::util::alloc::allocators::Allocators;
+use crate::plan::Plan;
+use crate::util::alloc::allocators::{AllocatorSelector, Allocators};
 use crate::util::{VMMutatorThread, VMWorkerThread};
 use crate::vm::VMBinding;
-use crate::Plan;
+
 use enum_map::EnumMap;
 
-pub fn ms_mutator_prepare<VM: VMBinding>(_mutator: &mut Mutator<VM>, _tls: VMWorkerThread) {
-    // Do nothing
-}
+#[cfg(feature = "malloc_mark_sweep")]
+mod malloc_mark_sweep {
+    use super::*;
+
+    // Do nothing for malloc mark sweep (malloc allocator)
+
+    pub fn ms_mutator_prepare<VM: VMBinding>(_mutator: &mut Mutator<VM>, _tls: VMWorkerThread) {}
+    pub fn ms_mutator_release<VM: VMBinding>(_mutator: &mut Mutator<VM>, _tls: VMWorkerThread) {}
 
-pub fn ms_mutator_release<VM: VMBinding>(_mutator: &mut Mutator<VM>, _tls: VMWorkerThread) {
-    // Do nothing
+    // malloc mark sweep uses 1 malloc allocator
+
+    pub(crate) const RESERVED_ALLOCATORS: ReservedAllocators = ReservedAllocators {
+        n_malloc: 1,
+        ..ReservedAllocators::DEFAULT
+    };
+    lazy_static! {
+        pub static ref ALLOCATOR_MAPPING: EnumMap<AllocationSemantics, AllocatorSelector> = {
+            let mut map = create_allocator_mapping(RESERVED_ALLOCATORS, true);
+            map[AllocationSemantics::Default] = AllocatorSelector::Malloc(0);
+            map
+        };
+    }
+    pub(crate) fn create_space_mapping<VM: VMBinding>(
+        plan: &'static dyn Plan<VM = VM>,
+    ) -> Box<SpaceMapping<VM>> {
+        let ms = plan.downcast_ref::<MarkSweep<VM>>().unwrap();
+        Box::new({
+            let mut vec =
+                crate::plan::mutator_context::create_space_mapping(RESERVED_ALLOCATORS, true, plan);
+            vec.push((AllocatorSelector::Malloc(0), ms.ms_space()));
+            vec
+        })
+    }
 }
 
-const RESERVED_ALLOCATORS: ReservedAllocators = ReservedAllocators {
-    n_malloc: 1,
-    ..ReservedAllocators::DEFAULT
-};
+#[cfg(not(feature = "malloc_mark_sweep"))]
+mod native_mark_sweep {
+    use super::*;
+    use crate::util::alloc::FreeListAllocator;
+
+    fn get_freelist_allocator_mut<VM: VMBinding>(
+        mutator: &mut Mutator<VM>,
+    ) -> &mut FreeListAllocator<VM> {
+        unsafe {
+            mutator
+                .allocators
+                .get_allocator_mut(mutator.config.allocator_mapping[AllocationSemantics::Default])
+        }
+        .downcast_mut::<FreeListAllocator<VM>>()
+        .unwrap()
+    }
+
+    // We forward calls to the allocator prepare and release
 
-lazy_static! {
-    pub static ref ALLOCATOR_MAPPING: EnumMap<AllocationSemantics, AllocatorSelector> = {
-        let mut map = create_allocator_mapping(RESERVED_ALLOCATORS, true);
-        map[AllocationSemantics::Default] = AllocatorSelector::Malloc(0);
-        map
+    #[cfg(not(feature = "malloc_mark_sweep"))]
+    pub fn ms_mutator_prepare<VM: VMBinding>(mutator: &mut Mutator<VM>, _tls: VMWorkerThread) {
+        get_freelist_allocator_mut::<VM>(mutator).prepare();
+    }
+
+    #[cfg(not(feature = "malloc_mark_sweep"))]
+    pub fn ms_mutator_release<VM: VMBinding>(mutator: &mut Mutator<VM>, _tls: VMWorkerThread) {
+        get_freelist_allocator_mut::<VM>(mutator).release();
+    }
+
+    // native mark sweep uses 1 free list allocator
+
+    pub(crate) const RESERVED_ALLOCATORS: ReservedAllocators = ReservedAllocators {
+        n_free_list: 1,
+        ..ReservedAllocators::DEFAULT
     };
+    lazy_static! {
+        pub static ref ALLOCATOR_MAPPING: EnumMap<AllocationSemantics, AllocatorSelector> = {
+            let mut map = create_allocator_mapping(RESERVED_ALLOCATORS, true);
+            map[AllocationSemantics::Default] = AllocatorSelector::FreeList(0);
+            map
+        };
+    }
+    pub(crate) fn create_space_mapping<VM: VMBinding>(
+        plan: &'static dyn Plan<VM = VM>,
+    ) -> Box<SpaceMapping<VM>> {
+        let ms = plan.downcast_ref::<MarkSweep<VM>>().unwrap();
+        Box::new({
+            let mut vec =
+                crate::plan::mutator_context::create_space_mapping(RESERVED_ALLOCATORS, true, plan);
+            vec.push((AllocatorSelector::FreeList(0), ms.ms_space()));
+            vec
+        })
+    }
 }
 
+#[cfg(feature = "malloc_mark_sweep")]
+pub use malloc_mark_sweep::*;
+
+#[cfg(not(feature = "malloc_mark_sweep"))]
+pub use native_mark_sweep::*;
+
 pub fn create_ms_mutator<VM: VMBinding>(
     mutator_tls: VMMutatorThread,
     plan: &'static dyn Plan<VM = VM>,
 ) -> Mutator<VM> {
-    let ms = plan.downcast_ref::<MarkSweep<VM>>().unwrap();
     let config = MutatorConfig {
         allocator_mapping: &*ALLOCATOR_MAPPING,
-        space_mapping: Box::new({
-            let mut vec = create_space_mapping(RESERVED_ALLOCATORS, true, plan);
-            vec.push((AllocatorSelector::Malloc(0), ms.ms_space()));
-            vec
-        }),
+        space_mapping: create_space_mapping(plan),
         prepare_func: &ms_mutator_prepare,
         release_func: &ms_mutator_release,
     };
diff --git a/src/plan/mutator_context.rs b/src/plan/mutator_context.rs
index 59dfd9f21d..2731af863e 100644
--- a/src/plan/mutator_context.rs
+++ b/src/plan/mutator_context.rs
@@ -11,7 +11,7 @@ use crate::vm::VMBinding;
 
 use enum_map::EnumMap;
 
-type SpaceMapping<VM> = Vec<(AllocatorSelector, &'static dyn Space<VM>)>;
+pub(crate) type SpaceMapping<VM> = Vec<(AllocatorSelector, &'static dyn Space<VM>)>;
 
 // This struct is part of the Mutator struct.
 // We are trying to make it fixed-sized so that VM bindings can easily define a Mutator type to have the exact same layout as our Mutator struct.
@@ -129,6 +129,28 @@ impl<VM: VMBinding> MutatorContext<VM> for Mutator<VM> {
     }
 }
 
+impl<VM: VMBinding> Mutator<VM> {
+    /// Get all the valid allocator selector (no duplicate)
+    fn get_all_allocator_selectors(&self) -> Vec<AllocatorSelector> {
+        use itertools::Itertools;
+        self.config
+            .allocator_mapping
+            .iter()
+            .map(|(_, selector)| *selector)
+            .sorted()
+            .dedup()
+            .filter(|selector| *selector != AllocatorSelector::None)
+            .collect()
+    }
+
+    /// Inform each allocator about destroying. Call allocator-specific on destroy methods.
+    pub fn on_destroy(&mut self) {
+        for selector in self.get_all_allocator_selectors() {
+            unsafe { self.allocators.get_allocator_mut(selector) }.on_mutator_destroy();
+        }
+    }
+}
+
 /// Each GC plan should provide their implementation of a MutatorContext. *Note that this trait is no longer needed as we removed
 /// per-plan mutator implementation and we will remove this trait as well in the future.*
 
@@ -175,6 +197,7 @@ pub(crate) struct ReservedAllocators {
     pub n_malloc: u8,
     pub n_immix: u8,
     pub n_mark_compact: u8,
+    pub n_free_list: u8,
 }
 
 impl ReservedAllocators {
@@ -184,6 +207,7 @@ impl ReservedAllocators {
         n_malloc: 0,
         n_immix: 0,
         n_mark_compact: 0,
+        n_free_list: 0,
     };
     /// check if the number of each allocator is okay. Panics if any allocator exceeds the max number.
     fn validate(&self) {
@@ -208,6 +232,10 @@ impl ReservedAllocators {
             self.n_mark_compact as usize <= MAX_MARK_COMPACT_ALLOCATORS,
             "Allocator mapping declared more mark compact allocators than the max allowed."
         );
+        assert!(
+            self.n_free_list as usize <= MAX_FREE_LIST_ALLOCATORS,
+            "Allocator mapping declared more free list allocators than the max allowed."
+        );
     }
 }
 
diff --git a/src/policy/immix/block.rs b/src/policy/immix/block.rs
index 73c14090d6..4a6234c9af 100644
--- a/src/policy/immix/block.rs
+++ b/src/policy/immix/block.rs
@@ -1,9 +1,9 @@
-use super::chunk::Chunk;
 use super::defrag::Histogram;
 use super::line::Line;
 use super::ImmixSpace;
 use crate::util::constants::*;
 use crate::util::heap::blockpageresource::BlockPool;
+use crate::util::heap::chunk_map::Chunk;
 use crate::util::linear_scan::{Region, RegionIterator};
 use crate::util::metadata::side_metadata::{MetadataByteArrayRef, SideMetadataSpec};
 use crate::util::Address;
diff --git a/src/policy/immix/chunk.rs b/src/policy/immix/chunk.rs
deleted file mode 100644
index 9c659efd3c..0000000000
--- a/src/policy/immix/chunk.rs
+++ /dev/null
@@ -1,222 +0,0 @@
-use super::block::{Block, BlockState};
-use super::defrag::Histogram;
-use super::immixspace::ImmixSpace;
-use crate::util::linear_scan::{Region, RegionIterator};
-use crate::util::metadata::side_metadata::SideMetadataSpec;
-use crate::{
-    scheduler::*,
-    util::{heap::layout::vm_layout_constants::LOG_BYTES_IN_CHUNK, Address},
-    vm::*,
-    MMTK,
-};
-use spin::Mutex;
-use std::sync::atomic::AtomicUsize;
-use std::sync::Arc;
-use std::{ops::Range, sync::atomic::Ordering};
-
-/// Data structure to reference a MMTk 4 MB chunk.
-#[repr(transparent)]
-#[derive(Debug, Clone, Copy, PartialOrd, PartialEq, Eq)]
-pub struct Chunk(Address);
-
-impl Region for Chunk {
-    const LOG_BYTES: usize = LOG_BYTES_IN_CHUNK;
-
-    #[inline(always)]
-    fn from_aligned_address(address: Address) -> Self {
-        debug_assert!(address.is_aligned_to(Self::BYTES));
-        Self(address)
-    }
-
-    #[inline(always)]
-    fn start(&self) -> Address {
-        self.0
-    }
-}
-
-impl Chunk {
-    /// Chunk constant with zero address
-    const ZERO: Self = Self(Address::ZERO);
-    /// Log blocks in chunk
-    pub const LOG_BLOCKS: usize = Self::LOG_BYTES - Block::LOG_BYTES;
-    /// Blocks in chunk
-    pub const BLOCKS: usize = 1 << Self::LOG_BLOCKS;
-
-    /// Get a range of blocks within this chunk.
-    #[inline(always)]
-    pub fn blocks(&self) -> RegionIterator<Block> {
-        let start = Block::from_unaligned_address(self.0);
-        let end = Block::from_aligned_address(start.start() + (Self::BLOCKS << Block::LOG_BYTES));
-        RegionIterator::<Block>::new(start, end)
-    }
-
-    /// Sweep this chunk.
-    pub fn sweep<VM: VMBinding>(&self, space: &ImmixSpace<VM>, mark_histogram: &mut Histogram) {
-        let line_mark_state = if super::BLOCK_ONLY {
-            None
-        } else {
-            Some(space.line_mark_state.load(Ordering::Acquire))
-        };
-        // number of allocated blocks.
-        let mut allocated_blocks = 0;
-        // Iterate over all allocated blocks in this chunk.
-        for block in self
-            .blocks()
-            .filter(|block| block.get_state() != BlockState::Unallocated)
-        {
-            if !block.sweep(space, mark_histogram, line_mark_state) {
-                // Block is live. Increment the allocated block count.
-                allocated_blocks += 1;
-            }
-        }
-        // Set this chunk as free if there is not live blocks.
-        if allocated_blocks == 0 {
-            space.chunk_map.set(*self, ChunkState::Free)
-        }
-    }
-}
-
-/// Chunk allocation state
-#[repr(u8)]
-#[derive(Debug, PartialEq, Clone, Copy)]
-pub enum ChunkState {
-    /// The chunk is not allocated.
-    Free = 0,
-    /// The chunk is allocated.
-    Allocated = 1,
-}
-
-/// A byte-map to record all the allocated chunks
-pub struct ChunkMap {
-    chunk_range: Mutex<Range<Chunk>>,
-}
-
-impl ChunkMap {
-    /// Chunk alloc table
-    pub const ALLOC_TABLE: SideMetadataSpec =
-        crate::util::metadata::side_metadata::spec_defs::IX_CHUNK_MARK;
-
-    pub fn new() -> Self {
-        Self {
-            chunk_range: Mutex::new(Chunk::ZERO..Chunk::ZERO),
-        }
-    }
-
-    /// Set chunk state
-    pub fn set(&self, chunk: Chunk, state: ChunkState) {
-        // Do nothing if the chunk is already in the expected state.
-        if self.get(chunk) == state {
-            return;
-        }
-        // Update alloc byte
-        unsafe { Self::ALLOC_TABLE.store::<u8>(chunk.start(), state as u8) };
-        // If this is a newly allcoated chunk, then expand the chunk range.
-        if state == ChunkState::Allocated {
-            debug_assert!(!chunk.start().is_zero());
-            let mut range = self.chunk_range.lock();
-            if range.start == Chunk::ZERO {
-                range.start = chunk;
-                range.end = chunk.next();
-            } else if chunk < range.start {
-                range.start = chunk;
-            } else if range.end <= chunk {
-                range.end = chunk.next();
-            }
-        }
-    }
-
-    /// Get chunk state
-    pub fn get(&self, chunk: Chunk) -> ChunkState {
-        let byte = unsafe { Self::ALLOC_TABLE.load::<u8>(chunk.start()) };
-        match byte {
-            0 => ChunkState::Free,
-            1 => ChunkState::Allocated,
-            _ => unreachable!(),
-        }
-    }
-
-    /// A range of all chunks in the heap.
-    pub fn all_chunks(&self) -> RegionIterator<Chunk> {
-        let chunk_range = self.chunk_range.lock();
-        RegionIterator::<Chunk>::new(chunk_range.start, chunk_range.end)
-    }
-
-    /// Helper function to create per-chunk processing work packets.
-    pub fn generate_tasks<VM: VMBinding>(
-        &self,
-        func: impl Fn(Chunk) -> Box<dyn GCWork<VM>>,
-    ) -> Vec<Box<dyn GCWork<VM>>> {
-        let mut work_packets: Vec<Box<dyn GCWork<VM>>> = vec![];
-        for chunk in self
-            .all_chunks()
-            .filter(|c| self.get(*c) == ChunkState::Allocated)
-        {
-            work_packets.push(func(chunk));
-        }
-        work_packets
-    }
-
-    /// Generate chunk sweep work packets.
-    pub fn generate_sweep_tasks<VM: VMBinding>(
-        &self,
-        space: &'static ImmixSpace<VM>,
-    ) -> Vec<Box<dyn GCWork<VM>>> {
-        space.defrag.mark_histograms.lock().clear();
-        let epilogue = Arc::new(FlushPageResource {
-            space,
-            counter: AtomicUsize::new(0),
-        });
-        let tasks = self.generate_tasks(|chunk| {
-            Box::new(SweepChunk {
-                space,
-                chunk,
-                epilogue: epilogue.clone(),
-            })
-        });
-        epilogue.counter.store(tasks.len(), Ordering::SeqCst);
-        tasks
-    }
-}
-
-impl Default for ChunkMap {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-/// Chunk sweeping work packet.
-struct SweepChunk<VM: VMBinding> {
-    space: &'static ImmixSpace<VM>,
-    chunk: Chunk,
-    /// A destructor invoked when all `SweepChunk` packets are finished.
-    epilogue: Arc<FlushPageResource<VM>>,
-}
-
-impl<VM: VMBinding> GCWork<VM> for SweepChunk<VM> {
-    #[inline]
-    fn do_work(&mut self, _worker: &mut GCWorker<VM>, _mmtk: &'static MMTK<VM>) {
-        let mut histogram = self.space.defrag.new_histogram();
-        if self.space.chunk_map.get(self.chunk) == ChunkState::Allocated {
-            self.chunk.sweep(self.space, &mut histogram);
-        }
-        self.space.defrag.add_completed_mark_histogram(histogram);
-        self.epilogue.finish_one_work_packet();
-    }
-}
-
-/// Count number of remaining work pacets, and flush page resource if all packets are finished.
-struct FlushPageResource<VM: VMBinding> {
-    space: &'static ImmixSpace<VM>,
-    counter: AtomicUsize,
-}
-
-impl<VM: VMBinding> FlushPageResource<VM> {
-    /// Called after a related work packet is finished.
-    fn finish_one_work_packet(&self) {
-        if 1 == self.counter.fetch_sub(1, Ordering::SeqCst) {
-            // We've finished releasing all the dead blocks to the BlockPageResource's thread-local queues.
-            // Now flush the BlockPageResource.
-            self.space.flush_page_resource()
-        }
-    }
-}
diff --git a/src/policy/immix/immixspace.rs b/src/policy/immix/immixspace.rs
index f5bc5bc345..9cb1f20af0 100644
--- a/src/policy/immix/immixspace.rs
+++ b/src/policy/immix/immixspace.rs
@@ -1,15 +1,12 @@
 use super::line::*;
-use super::{
-    block::*,
-    chunk::{Chunk, ChunkMap, ChunkState},
-    defrag::Defrag,
-};
+use super::{block::*, defrag::Defrag};
 use crate::policy::gc_work::TraceKind;
 use crate::policy::sft::GCWorkerMutRef;
 use crate::policy::sft::SFT;
 use crate::policy::space::SpaceOptions;
 use crate::policy::space::{CommonSpace, Space};
 use crate::util::copy::*;
+use crate::util::heap::chunk_map::*;
 use crate::util::heap::layout::heap_layout::{Mmapper, VMMap};
 use crate::util::heap::BlockPageResource;
 use crate::util::heap::HeapMeta;
@@ -28,7 +25,7 @@ use crate::{
     MMTK,
 };
 use atomic::Ordering;
-use std::sync::{atomic::AtomicU8, Arc};
+use std::sync::{atomic::AtomicU8, atomic::AtomicUsize, Arc};
 
 pub(crate) const TRACE_KIND_FAST: TraceKind = 0;
 pub(crate) const TRACE_KIND_DEFRAG: TraceKind = 1;
@@ -341,9 +338,7 @@ impl<VM: VMBinding> ImmixSpace<VM> {
             self.reusable_blocks.reset();
         }
         // Sweep chunks and blocks
-        // # Safety: ImmixSpace reference is always valid within this collection cycle.
-        let space = unsafe { &*(self as *const Self) };
-        let work_packets = self.chunk_map.generate_sweep_tasks(space);
+        let work_packets = self.generate_sweep_tasks();
         self.scheduler().work_buckets[WorkBucketStage::Release].bulk_add(work_packets);
         if super::DEFRAG {
             self.defrag.release(self);
@@ -351,6 +346,26 @@ impl<VM: VMBinding> ImmixSpace<VM> {
         did_defrag
     }
 
+    /// Generate chunk sweep tasks
+    fn generate_sweep_tasks(&self) -> Vec<Box<dyn GCWork<VM>>> {
+        self.defrag.mark_histograms.lock().clear();
+        // # Safety: ImmixSpace reference is always valid within this collection cycle.
+        let space = unsafe { &*(self as *const Self) };
+        let epilogue = Arc::new(FlushPageResource {
+            space,
+            counter: AtomicUsize::new(0),
+        });
+        let tasks = self.chunk_map.generate_tasks(|chunk| {
+            Box::new(SweepChunk {
+                space,
+                chunk,
+                epilogue: epilogue.clone(),
+            })
+        });
+        epilogue.counter.store(tasks.len(), Ordering::SeqCst);
+        tasks
+    }
+
     /// Release a block.
     pub fn release_block(&self, block: Block) {
         block.deinit();
@@ -649,7 +664,7 @@ impl<VM: VMBinding> GCWork<VM> for PrepareBlockState<VM> {
         // Clear object mark table for this chunk
         Self::reset_object_mark(self.chunk);
         // Iterate over all blocks in this chunk
-        for block in self.chunk.blocks() {
+        for block in self.chunk.iter_region::<Block>() {
             let state = block.get_state();
             // Skip unallocated blocks.
             if state == BlockState::Unallocated {
@@ -669,6 +684,64 @@ impl<VM: VMBinding> GCWork<VM> for PrepareBlockState<VM> {
     }
 }
 
+/// Chunk sweeping work packet.
+struct SweepChunk<VM: VMBinding> {
+    space: &'static ImmixSpace<VM>,
+    chunk: Chunk,
+    /// A destructor invoked when all `SweepChunk` packets are finished.
+    epilogue: Arc<FlushPageResource<VM>>,
+}
+
+impl<VM: VMBinding> GCWork<VM> for SweepChunk<VM> {
+    #[inline]
+    fn do_work(&mut self, _worker: &mut GCWorker<VM>, _mmtk: &'static MMTK<VM>) {
+        let mut histogram = self.space.defrag.new_histogram();
+        if self.space.chunk_map.get(self.chunk) == ChunkState::Allocated {
+            let line_mark_state = if super::BLOCK_ONLY {
+                None
+            } else {
+                Some(self.space.line_mark_state.load(Ordering::Acquire))
+            };
+            // number of allocated blocks.
+            let mut allocated_blocks = 0;
+            // Iterate over all allocated blocks in this chunk.
+            for block in self
+                .chunk
+                .iter_region::<Block>()
+                .filter(|block| block.get_state() != BlockState::Unallocated)
+            {
+                if !block.sweep(self.space, &mut histogram, line_mark_state) {
+                    // Block is live. Increment the allocated block count.
+                    allocated_blocks += 1;
+                }
+            }
+            // Set this chunk as free if there is not live blocks.
+            if allocated_blocks == 0 {
+                self.space.chunk_map.set(self.chunk, ChunkState::Free)
+            }
+        }
+        self.space.defrag.add_completed_mark_histogram(histogram);
+        self.epilogue.finish_one_work_packet();
+    }
+}
+
+/// Count number of remaining work pacets, and flush page resource if all packets are finished.
+struct FlushPageResource<VM: VMBinding> {
+    space: &'static ImmixSpace<VM>,
+    counter: AtomicUsize,
+}
+
+impl<VM: VMBinding> FlushPageResource<VM> {
+    /// Called after a related work packet is finished.
+    fn finish_one_work_packet(&self) {
+        if 1 == self.counter.fetch_sub(1, Ordering::SeqCst) {
+            // We've finished releasing all the dead blocks to the BlockPageResource's thread-local queues.
+            // Now flush the BlockPageResource.
+            self.space.flush_page_resource()
+        }
+    }
+}
+
 use crate::plan::{Plan, VectorObjectQueue};
 use crate::policy::copy_context::PolicyCopyContext;
 use crate::util::alloc::Allocator;
diff --git a/src/policy/immix/line.rs b/src/policy/immix/line.rs
index 19e9f930b9..511ae1c4fb 100644
--- a/src/policy/immix/line.rs
+++ b/src/policy/immix/line.rs
@@ -71,7 +71,7 @@ impl Line {
     #[inline]
     pub fn mark_lines_for_object<VM: VMBinding>(object: ObjectReference, state: u8) -> usize {
         debug_assert!(!super::BLOCK_ONLY);
-        let start = VM::VMObjectModel::ref_to_object_start(object);
+        let start = object.to_object_start::<VM>();
         let end = start + VM::VMObjectModel::get_current_size(object);
         let start_line = Line::from_unaligned_address(start);
         let mut end_line = Line::from_unaligned_address(end);
diff --git a/src/policy/immix/mod.rs b/src/policy/immix/mod.rs
index a158663f1b..490018a215 100644
--- a/src/policy/immix/mod.rs
+++ b/src/policy/immix/mod.rs
@@ -1,5 +1,4 @@
 pub mod block;
-pub mod chunk;
 pub mod defrag;
 pub mod immixspace;
 pub mod line;
diff --git a/src/policy/largeobjectspace.rs b/src/policy/largeobjectspace.rs
index 7b64d89b36..14d802570b 100644
--- a/src/policy/largeobjectspace.rs
+++ b/src/policy/largeobjectspace.rs
@@ -235,9 +235,7 @@ impl<VM: VMBinding> LargeObjectSpace<VM> {
             #[cfg(feature = "global_alloc_bit")]
             crate::util::alloc_bit::unset_alloc_bit::<VM>(object);
             self.pr
-                .release_pages(get_super_page(VM::VMObjectModel::ref_to_object_start(
-                    object,
-                )));
+                .release_pages(get_super_page(object.to_object_start::<VM>()));
         };
         if sweep_nursery {
             for object in self.treadmill.collect_nursery() {
diff --git a/src/policy/mallocspace/mod.rs b/src/policy/mallocspace/mod.rs
deleted file mode 100644
index 07dec884ac..0000000000
--- a/src/policy/mallocspace/mod.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-///! A marksweep space that allocates from malloc.
-mod global;
-pub mod metadata;
-
-pub use global::*;
diff --git a/src/policy/markcompactspace.rs b/src/policy/markcompactspace.rs
index 89a5eb6df1..e42bd56625 100644
--- a/src/policy/markcompactspace.rs
+++ b/src/policy/markcompactspace.rs
@@ -159,7 +159,7 @@ impl<VM: VMBinding> MarkCompactSpace<VM> {
     /// Get the address for header forwarding pointer
     #[inline(always)]
     fn header_forwarding_pointer_address(object: ObjectReference) -> Address {
-        VM::VMObjectModel::ref_to_object_start(object) - GC_EXTRA_HEADER_BYTES
+        object.to_object_start::<VM>() - GC_EXTRA_HEADER_BYTES
     }
 
     /// Get header forwarding pointer for an object
@@ -398,7 +398,7 @@ impl<VM: VMBinding> MarkCompactSpace<VM> {
                 let end_of_new_object = VM::VMObjectModel::copy_to(obj, new_object, Address::ZERO);
                 // update alloc_bit,
                 alloc_bit::set_alloc_bit::<VM>(new_object);
-                to = VM::VMObjectModel::ref_to_object_start(new_object) + copied_size;
+                to = new_object.to_object_start::<VM>() + copied_size;
                 debug_assert_eq!(end_of_new_object, to);
             }
         }
diff --git a/src/policy/mallocspace/global.rs b/src/policy/marksweepspace/malloc_ms/global.rs
similarity index 87%
rename from src/policy/mallocspace/global.rs
rename to src/policy/marksweepspace/malloc_ms/global.rs
index e914fd7c48..f43ec4ccc7 100644
--- a/src/policy/mallocspace/global.rs
+++ b/src/policy/marksweepspace/malloc_ms/global.rs
@@ -4,7 +4,12 @@ use crate::plan::VectorObjectQueue;
 use crate::policy::sft::GCWorkerMutRef;
 use crate::policy::sft::SFT;
 use crate::policy::space::CommonSpace;
+use crate::scheduler::GCWorkScheduler;
+use crate::util::heap::layout::heap_layout::Mmapper;
+use crate::util::heap::layout::heap_layout::VMMap;
+use crate::util::heap::HeapMeta;
 use crate::util::heap::PageResource;
+use crate::util::heap::VMRequest;
 use crate::util::malloc::library::{BYTES_IN_MALLOC_PAGE, LOG_BYTES_IN_MALLOC_PAGE};
 use crate::util::malloc::malloc_ms_util::*;
 use crate::util::metadata::side_metadata::{
@@ -24,14 +29,15 @@ use std::marker::PhantomData;
 #[cfg(debug_assertions)]
 use std::sync::atomic::AtomicU32;
 use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
 #[cfg(debug_assertions)]
 use std::sync::Mutex;
-
 // If true, we will use a hashmap to store all the allocated memory from malloc, and use it
 // to make sure our allocation is correct.
 #[cfg(debug_assertions)]
 const ASSERT_ALLOCATION: bool = false;
 
+/// This space uses malloc to get new memory, and performs mark-sweep for the memory.
 pub struct MallocSpace<VM: VMBinding> {
     phantom: PhantomData<VM>,
     active_bytes: AtomicUsize,
@@ -39,6 +45,8 @@ pub struct MallocSpace<VM: VMBinding> {
     pub chunk_addr_min: AtomicUsize, // XXX: have to use AtomicUsize to represent an Address
     pub chunk_addr_max: AtomicUsize,
     metadata: SideMetadataContext,
+    /// Work packet scheduler
+    scheduler: Arc<GCWorkScheduler<VM>>,
     // Mapping between allocated address and its size - this is used to check correctness.
     // Size will be set to zero when the memory is freed.
     #[cfg(debug_assertions)]
@@ -135,7 +143,7 @@ impl<VM: VMBinding> Space<VM> for MallocSpace<VM> {
 
         #[cfg(debug_assertions)]
         if ASSERT_ALLOCATION {
-            let addr = VM::VMObjectModel::ref_to_object_start(object);
+            let addr = object.to_object_start::<VM>();
             let active_mem = self.active_mem.lock().unwrap();
             if ret {
                 // The alloc bit tells that the object is in space.
@@ -211,8 +219,35 @@ impl<VM: VMBinding> crate::policy::gc_work::PolicyTraceObject<VM> for MallocSpac
     }
 }
 
+// Actually no max object size.
+#[allow(dead_code)]
+pub const MAX_OBJECT_SIZE: usize = crate::util::constants::MAX_INT;
+
 impl<VM: VMBinding> MallocSpace<VM> {
-    pub fn new(global_side_metadata_specs: Vec<SideMetadataSpec>) -> Self {
+    pub fn extend_global_side_metadata_specs(specs: &mut Vec<SideMetadataSpec>) {
+        // MallocSpace needs to use alloc bit. If the feature is turned on, the alloc bit spec is in the global specs.
+        // Otherwise, we manually add it.
+        if !cfg!(feature = "global_alloc_bit") {
+            specs.push(crate::util::alloc_bit::ALLOC_SIDE_METADATA_SPEC);
+        }
+        // MallocSpace also need a global chunk metadata.
+        // TODO: I don't know why this is a global spec. Can we replace it with the chunk map (and the local spec used in the chunk map)?
+        // One reason could be that the address range in this space is not in our control, and it could be anywhere in the heap, thus we have
+        // to make it a global spec. I am not too sure about this.
+        specs.push(ACTIVE_CHUNK_METADATA_SPEC);
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        _name: &'static str,
+        _zeroed: bool,
+        _vmrequest: VMRequest,
+        global_side_metadata_specs: Vec<SideMetadataSpec>,
+        _vm_map: &'static VMMap,
+        _mmapper: &'static Mmapper,
+        _heap: &mut HeapMeta,
+        scheduler: Arc<GCWorkScheduler<VM>>,
+    ) -> Self {
         MallocSpace {
             phantom: PhantomData,
             active_bytes: AtomicUsize::new(0),
@@ -227,6 +262,7 @@ impl<VM: VMBinding> MallocSpace<VM> {
                     *VM::VMObjectModel::LOCAL_MARK_BIT_SPEC,
                 ]),
             },
+            scheduler,
             #[cfg(debug_assertions)]
             active_mem: Mutex::new(HashMap::new()),
             #[cfg(debug_assertions)]
@@ -371,8 +407,7 @@ impl<VM: VMBinding> MallocSpace<VM> {
         );
 
         if !is_marked::<VM>(object, Ordering::Relaxed) {
-            let chunk_start =
-                conversions::chunk_align_down(VM::VMObjectModel::ref_to_object_start(object));
+            let chunk_start = conversions::chunk_align_down(object.to_object_start::<VM>());
             set_mark_bit::<VM>(object, Ordering::SeqCst);
             set_chunk_mark(chunk_start);
             queue.enqueue(object);
@@ -425,6 +460,39 @@ impl<VM: VMBinding> MallocSpace<VM> {
         }
     }
 
+    pub fn prepare(&mut self) {}
+
+    pub fn release(&mut self) {
+        use crate::scheduler::WorkBucketStage;
+        let mut work_packets: Vec<Box<dyn GCWork<VM>>> = vec![];
+        let mut chunk = unsafe { Address::from_usize(self.chunk_addr_min.load(Ordering::Relaxed)) }; // XXX: have to use AtomicUsize to represent an Address
+        let end = unsafe { Address::from_usize(self.chunk_addr_max.load(Ordering::Relaxed)) }
+            + BYTES_IN_CHUNK;
+
+        // Since only a single thread generates the sweep work packets as well as it is a Stop-the-World collector,
+        // we can assume that the chunk mark metadata is not being accessed by anything else and hence we use
+        // non-atomic accesses
+        let space = unsafe { &*(self as *const Self) };
+        while chunk < end {
+            if is_chunk_mapped(chunk) && unsafe { is_chunk_marked_unsafe(chunk) } {
+                work_packets.push(Box::new(MSSweepChunk { ms: space, chunk }));
+            }
+
+            chunk += BYTES_IN_CHUNK;
+        }
+
+        debug!("Generated {} sweep work packets", work_packets.len());
+        #[cfg(debug_assertions)]
+        {
+            self.total_work_packets
+                .store(work_packets.len() as u32, Ordering::SeqCst);
+            self.completed_work_packets.store(0, Ordering::SeqCst);
+            self.work_live_bytes.store(0, Ordering::SeqCst);
+        }
+
+        self.scheduler.work_buckets[WorkBucketStage::Release].bulk_add(work_packets);
+    }
+
     pub fn sweep_chunk(&self, chunk_start: Address) {
         // Call the relevant sweep function depending on the location of the mark bits
         match *VM::VMObjectModel::LOCAL_MARK_BIT_SPEC {
@@ -440,7 +508,7 @@ impl<VM: VMBinding> MallocSpace<VM> {
     /// Given an object in MallocSpace, return its malloc address, whether it is an offset malloc, and malloc size
     #[inline(always)]
     fn get_malloc_addr_size(object: ObjectReference) -> (Address, bool, usize) {
-        let obj_start = VM::VMObjectModel::ref_to_object_start(object);
+        let obj_start = object.to_object_start::<VM>();
         let offset_malloc_bit = is_offset_malloc(obj_start);
         let bytes = get_malloc_usable_size(obj_start, offset_malloc_bit);
         (obj_start, offset_malloc_bit, bytes)
@@ -479,8 +547,9 @@ impl<VM: VMBinding> MallocSpace<VM> {
             // Unset marks for free pages and update last_object_end
             if !empty_page_start.is_zero() {
                 // unset marks for pages since last object
-                let current_page =
-                    VM::VMObjectModel::ref_to_object_start(object).align_down(BYTES_IN_MALLOC_PAGE);
+                let current_page = object
+                    .to_object_start::<VM>()
+                    .align_down(BYTES_IN_MALLOC_PAGE);
                 if current_page > *empty_page_start {
                     // we are the only GC thread that is accessing this chunk
                     unsafe {
@@ -729,3 +798,20 @@ impl<VM: VMBinding> crate::util::linear_scan::LinearScanObjectSize for MallocObj
         bytes
     }
 }
+
+use crate::scheduler::GCWork;
+use crate::MMTK;
+
+/// Simple work packet that just sweeps a single chunk
+pub struct MSSweepChunk<VM: VMBinding> {
+    ms: &'static MallocSpace<VM>,
+    // starting address of a chunk
+    chunk: Address,
+}
+
+impl<VM: VMBinding> GCWork<VM> for MSSweepChunk<VM> {
+    #[inline]
+    fn do_work(&mut self, _worker: &mut GCWorker<VM>, _mmtk: &'static MMTK<VM>) {
+        self.ms.sweep_chunk(self.chunk);
+    }
+}
diff --git a/src/policy/mallocspace/metadata.rs b/src/policy/marksweepspace/malloc_ms/metadata.rs
similarity index 100%
rename from src/policy/mallocspace/metadata.rs
rename to src/policy/marksweepspace/malloc_ms/metadata.rs
diff --git a/src/policy/marksweepspace/malloc_ms/mod.rs b/src/policy/marksweepspace/malloc_ms/mod.rs
new file mode 100644
index 0000000000..623c3cc234
--- /dev/null
+++ b/src/policy/marksweepspace/malloc_ms/mod.rs
@@ -0,0 +1,5 @@
+mod global;
+mod metadata;
+
+pub use global::*;
+pub use metadata::*;
diff --git a/src/policy/marksweepspace/mod.rs b/src/policy/marksweepspace/mod.rs
new file mode 100644
index 0000000000..e9688d896b
--- /dev/null
+++ b/src/policy/marksweepspace/mod.rs
@@ -0,0 +1,15 @@
+//! Mark sweep space.
+//! MMTk provides two implementations of mark sweep:
+//! 1. mark sweep using a native freelist allocator implemented in MMTk. This is the default mark sweep implementation, and
+//!    most people should use this.
+//! 2. mark sweep using malloc as its freelist allocator. Use the feature `malloc_mark_sweep` to enable it. As we do not control
+//!    the allocation of malloc, we have to work around a few issues to make it for mark sweep. Thus it has considerably worse performance.
+//!    This is an experimental feature, and should only be used if you are actually interested in using malloc as the allocator.
+//!    Otherwise this should not be used.
+
+// TODO: we should extract the code about mark sweep, and make both implementation use the same mark sweep code.
+
+/// Malloc mark sweep. This uses `MallocSpace` and `MallocAllocator`.
+pub(crate) mod malloc_ms;
+/// Native mark sweep. This uses `MarkSweepSpace` and `FreeListAllocator`.
+pub(crate) mod native_ms;
diff --git a/src/policy/marksweepspace/native_ms/block.rs b/src/policy/marksweepspace/native_ms/block.rs
new file mode 100644
index 0000000000..200bc7eb66
--- /dev/null
+++ b/src/policy/marksweepspace/native_ms/block.rs
@@ -0,0 +1,447 @@
+// adapted from Immix
+
+use atomic::Ordering;
+
+use super::BlockList;
+use super::MarkSweepSpace;
+use crate::util::heap::chunk_map::*;
+use crate::util::linear_scan::Region;
+use crate::vm::ObjectModel;
+use crate::{
+    util::{
+        metadata::side_metadata::SideMetadataSpec, Address, ObjectReference, OpaquePointer,
+        VMThread,
+    },
+    vm::VMBinding,
+};
+
+use std::num::NonZeroUsize;
+
+/// A 64KB region for MiMalloc.
+/// This is also known as MiMalloc page. We try to avoid getting confused with the OS 4K page. So we call it block.
+/// This type always holds a non-zero address to refer to a block. The underlying `NonZeroUsize` type ensures the
+/// size of `Option<Block>` is the same as `Block` itself.
+// TODO: If we actually use the first block, we would need to turn the type into `Block(Address)`, and use `None` and
+// `Block(Address::ZERO)` to differentiate those.
+#[derive(Debug, Clone, Copy, PartialOrd, PartialEq)]
+#[repr(transparent)]
+pub struct Block(NonZeroUsize);
+
+impl Region for Block {
+    const LOG_BYTES: usize = 16;
+
+    #[inline(always)]
+    fn from_aligned_address(address: Address) -> Self {
+        debug_assert!(address.is_aligned_to(Self::BYTES));
+        debug_assert!(!address.is_zero());
+        Self(unsafe { NonZeroUsize::new_unchecked(address.as_usize()) })
+    }
+
+    #[inline(always)]
+    fn start(&self) -> Address {
+        unsafe { Address::from_usize(self.0.get()) }
+    }
+}
+
+impl Block {
+    pub const METADATA_SPECS: [SideMetadataSpec; 7] = [
+        Self::MARK_TABLE,
+        Self::NEXT_BLOCK_TABLE,
+        Self::PREV_BLOCK_TABLE,
+        Self::FREE_LIST_TABLE,
+        Self::SIZE_TABLE,
+        Self::BLOCK_LIST_TABLE,
+        Self::TLS_TABLE,
+    ];
+
+    /// Block mark table (side)
+    pub const MARK_TABLE: SideMetadataSpec =
+        crate::util::metadata::side_metadata::spec_defs::MS_BLOCK_MARK;
+
+    pub const NEXT_BLOCK_TABLE: SideMetadataSpec =
+        crate::util::metadata::side_metadata::spec_defs::MS_BLOCK_NEXT;
+
+    pub const PREV_BLOCK_TABLE: SideMetadataSpec =
+        crate::util::metadata::side_metadata::spec_defs::MS_BLOCK_PREV;
+
+    pub const FREE_LIST_TABLE: SideMetadataSpec =
+        crate::util::metadata::side_metadata::spec_defs::MS_FREE;
+
+    // needed for non GC context
+    #[cfg(feature = "malloc_native_mimalloc")]
+    pub const LOCAL_FREE_LIST_TABLE: SideMetadataSpec =
+        crate::util::metadata::side_metadata::spec_defs::MS_LOCAL_FREE;
+
+    #[cfg(feature = "malloc_native_mimalloc")]
+    pub const THREAD_FREE_LIST_TABLE: SideMetadataSpec =
+        crate::util::metadata::side_metadata::spec_defs::MS_THREAD_FREE;
+
+    pub const SIZE_TABLE: SideMetadataSpec =
+        crate::util::metadata::side_metadata::spec_defs::MS_BLOCK_SIZE;
+
+    pub const BLOCK_LIST_TABLE: SideMetadataSpec =
+        crate::util::metadata::side_metadata::spec_defs::MS_BLOCK_LIST;
+
+    pub const TLS_TABLE: SideMetadataSpec =
+        crate::util::metadata::side_metadata::spec_defs::MS_BLOCK_TLS;
+
+    #[inline]
+    pub fn load_free_list(&self) -> Address {
+        unsafe { Address::from_usize(Block::FREE_LIST_TABLE.load::<usize>(self.start())) }
+    }
+
+    #[inline]
+    pub fn store_free_list(&self, free_list: Address) {
+        unsafe { Block::FREE_LIST_TABLE.store::<usize>(self.start(), free_list.as_usize()) }
+    }
+
+    #[cfg(feature = "malloc_native_mimalloc")]
+    #[inline]
+    pub fn load_local_free_list(&self) -> Address {
+        unsafe { Address::from_usize(Block::LOCAL_FREE_LIST_TABLE.load::<usize>(self.start())) }
+    }
+
+    #[cfg(feature = "malloc_native_mimalloc")]
+    #[inline]
+    pub fn store_local_free_list(&self, local_free: Address) {
+        unsafe { Block::LOCAL_FREE_LIST_TABLE.store::<usize>(self.start(), local_free.as_usize()) }
+    }
+
+    #[cfg(feature = "malloc_native_mimalloc")]
+    #[inline]
+    pub fn load_thread_free_list(&self) -> Address {
+        unsafe {
+            Address::from_usize(
+                Block::THREAD_FREE_LIST_TABLE.load_atomic::<usize>(self.start(), Ordering::SeqCst),
+            )
+        }
+    }
+
+    #[cfg(feature = "malloc_native_mimalloc")]
+    #[inline]
+    pub fn store_thread_free_list(&self, thread_free: Address) {
+        unsafe {
+            Block::THREAD_FREE_LIST_TABLE.store::<usize>(self.start(), thread_free.as_usize())
+        }
+    }
+
+    #[cfg(feature = "malloc_native_mimalloc")]
+    #[inline]
+    pub fn cas_thread_free_list(&self, old_thread_free: Address, new_thread_free: Address) -> bool {
+        Block::THREAD_FREE_LIST_TABLE
+            .compare_exchange_atomic::<usize>(
+                self.start(),
+                old_thread_free.as_usize(),
+                new_thread_free.as_usize(),
+                Ordering::SeqCst,
+                Ordering::SeqCst,
+            )
+            .is_ok()
+    }
+
+    pub fn load_prev_block(&self) -> Option<Block> {
+        let prev = unsafe { Block::PREV_BLOCK_TABLE.load::<usize>(self.start()) };
+        NonZeroUsize::new(prev).map(Block)
+    }
+
+    pub fn load_next_block(&self) -> Option<Block> {
+        let next = unsafe { Block::NEXT_BLOCK_TABLE.load::<usize>(self.start()) };
+        NonZeroUsize::new(next).map(Block)
+    }
+
+    pub fn store_next_block(&self, next: Block) {
+        unsafe {
+            Block::NEXT_BLOCK_TABLE.store::<usize>(self.start(), next.start().as_usize());
+        }
+    }
+
+    pub fn clear_next_block(&self) {
+        unsafe {
+            Block::NEXT_BLOCK_TABLE.store::<usize>(self.start(), 0);
+        }
+    }
+
+    pub fn store_prev_block(&self, prev: Block) {
+        unsafe {
+            Block::PREV_BLOCK_TABLE.store::<usize>(self.start(), prev.start().as_usize());
+        }
+    }
+
+    pub fn clear_prev_block(&self) {
+        unsafe {
+            Block::PREV_BLOCK_TABLE.store::<usize>(self.start(), 0);
+        }
+    }
+
+    pub fn store_block_list(&self, block_list: &BlockList) {
+        let block_list_usize: usize =
+            unsafe { std::mem::transmute::<&BlockList, usize>(block_list) };
+        unsafe {
+            Block::BLOCK_LIST_TABLE.store::<usize>(self.start(), block_list_usize);
+        }
+    }
+
+    pub fn load_block_list(&self) -> *mut BlockList {
+        let block_list =
+            Block::BLOCK_LIST_TABLE.load_atomic::<usize>(self.start(), Ordering::SeqCst);
+        unsafe { std::mem::transmute::<usize, *mut BlockList>(block_list) }
+    }
+
+    pub fn load_block_cell_size(&self) -> usize {
+        Block::SIZE_TABLE.load_atomic::<usize>(self.start(), Ordering::SeqCst)
+    }
+
+    pub fn store_block_cell_size(&self, size: usize) {
+        unsafe { Block::SIZE_TABLE.store::<usize>(self.start(), size) }
+    }
+
+    pub fn store_tls(&self, tls: VMThread) {
+        let tls = unsafe { std::mem::transmute::<OpaquePointer, usize>(tls.0) };
+        unsafe { Block::TLS_TABLE.store(self.start(), tls) }
+    }
+
+    pub fn load_tls(&self) -> VMThread {
+        let tls = Block::TLS_TABLE.load_atomic::<usize>(self.start(), Ordering::SeqCst);
+        VMThread(OpaquePointer::from_address(unsafe {
+            Address::from_usize(tls)
+        }))
+    }
+
+    pub fn has_free_cells(&self) -> bool {
+        !self.load_free_list().is_zero()
+    }
+
+    /// Get block mark state.
+    #[inline(always)]
+    pub fn get_state(&self) -> BlockState {
+        let byte = Self::MARK_TABLE.load_atomic::<u8>(self.start(), Ordering::SeqCst);
+        byte.into()
+    }
+
+    /// Set block mark state.
+    #[inline(always)]
+    pub fn set_state(&self, state: BlockState) {
+        let state = u8::from(state);
+        Self::MARK_TABLE.store_atomic::<u8>(self.start(), state, Ordering::SeqCst);
+    }
+
+    /// Release this block if it is unmarked. Return true if the block is release.
+    #[inline(always)]
+    pub fn attempt_release<VM: VMBinding>(self, space: &MarkSweepSpace<VM>) -> bool {
+        match self.get_state() {
+            BlockState::Unallocated => false,
+            BlockState::Unmarked => {
+                unsafe {
+                    let block_list = loop {
+                        let list = self.load_block_list();
+                        (*list).lock();
+                        if list == self.load_block_list() {
+                            break list;
+                        }
+                        (*list).unlock();
+                    };
+                    (*block_list).remove(self);
+                    (*block_list).unlock();
+                }
+                space.release_block(self);
+                true
+            }
+            BlockState::Marked => {
+                // The block is live.
+                false
+            }
+        }
+    }
+
+    /// Sweep the block. This is done either lazily in the allocation phase, or eagerly at the end of a GC.
+    pub fn sweep<VM: VMBinding>(&self) {
+        // The important point here is that we need to distinguish cell address, allocation address, and object reference.
+        // We only know cell addresses here. We do not know the allocation address, and we also do not know the object reference.
+        // The mark bit is set for object references, and we need to use the mark bit to decide whether a cell is live or not.
+
+        // We haven't implemented for malloc/free cases, for which we do not have mark bit. We could use valid object bit instead.
+        if cfg!(feature = "malloc_native_mimalloc") {
+            unimplemented!()
+        }
+
+        // Check if we can treat it as the simple case: cell address === object reference.
+        // If the binding does not use allocation offset, and they use the same allocation alignment which the cell size is aligned to,
+        // then we have cell address === allocation address.
+        // Furthermore, if the binding does not have an offset between allocation and object reference, then allocation address === cell address.
+        if !VM::USE_ALLOCATION_OFFSET
+            && VM::MAX_ALIGNMENT == VM::MIN_ALIGNMENT
+            && crate::util::conversions::raw_is_aligned(
+                self.load_block_cell_size(),
+                VM::MAX_ALIGNMENT,
+            )
+            && VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS
+        {
+            // In this case, we can use the simplest and the most efficicent sweep.
+            self.simple_sweep::<VM>()
+        } else {
+            // Otherwise we fallback to a generic but slow sweep. This roughly has ~10% mutator overhead for lazy sweeping.
+            self.naive_brute_force_sweep::<VM>()
+        }
+    }
+
+    /// This implementation uses object reference and cell address interchangably. This is not correct for most cases.
+    /// However, in certain cases, such as OpenJDK, this is correct, and efficient. See the sweep method for the invariants
+    /// that we need to use this method correctly.
+    fn simple_sweep<VM: VMBinding>(&self) {
+        let cell_size = self.load_block_cell_size();
+        let mut cell = self.start();
+        let mut last = unsafe { Address::zero() };
+        while cell + cell_size <= self.start() + Block::BYTES {
+            // The invariants we checked earlier ensures that we can use cell and object reference interchangably
+            // We may not really have an object in this cell, but if we do, this object reference is correct.
+            let potential_object = ObjectReference::from_raw_address(cell);
+
+            if !VM::VMObjectModel::LOCAL_MARK_BIT_SPEC
+                .is_marked::<VM>(potential_object, Ordering::SeqCst)
+            {
+                // clear alloc bit if it is ever set. It is possible that the alloc bit is never set for this cell (i.e. there was no object in this cell before this GC),
+                // we unset the bit anyway.
+                #[cfg(feature = "global_alloc_bit")]
+                crate::util::alloc_bit::unset_alloc_bit_nocheck::<VM>(potential_object);
+                unsafe {
+                    cell.store::<Address>(last);
+                }
+                last = cell;
+            }
+            cell += cell_size;
+        }
+
+        self.store_free_list(last);
+    }
+
+    /// This is a naive implementation that is inefficient but should be correct.
+    /// In this implementation, we simply go through each possible object
+    /// reference and see if it has the mark bit set. If we find mark bit, that means the cell is alive. If we didn't find
+    /// the mark bit in the entire cell, it means the cell is dead.
+    fn naive_brute_force_sweep<VM: VMBinding>(&self) {
+        use crate::util::constants::MIN_OBJECT_SIZE;
+
+        // Cell size for this block.
+        let cell_size = self.load_block_cell_size();
+        // Current cell
+        let mut cell = self.start();
+        // Last free cell in the free list
+        let mut last = Address::ZERO;
+        // Current cursor
+        let mut cursor = cell;
+
+        debug!("Sweep block {:?}, cell size {}", self, cell_size);
+
+        while cell + cell_size <= self.end() {
+            // possible object ref
+            let potential_object_ref = ObjectReference::from_raw_address(
+                cursor + VM::VMObjectModel::OBJECT_REF_OFFSET_LOWER_BOUND,
+            );
+            trace!(
+                "{:?}: cell = {}, last cell in free list = {}, cursor = {}, potential object = {}",
+                self,
+                cell,
+                last,
+                cursor,
+                potential_object_ref
+            );
+
+            if VM::VMObjectModel::LOCAL_MARK_BIT_SPEC
+                .is_marked::<VM>(potential_object_ref, Ordering::SeqCst)
+            {
+                debug!("{:?} Live cell: {}", self, cell);
+                // If the mark bit is set, the cell is alive.
+                // We directly jump to the end of the cell.
+                cell += cell_size;
+                cursor = cell;
+            } else {
+                // If the mark bit is not set, we don't know if the cell is alive or not. We keep search for the mark bit.
+                cursor += MIN_OBJECT_SIZE;
+
+                if cursor >= cell + cell_size {
+                    // We now stepped to the next cell. This means we did not find mark bit in the current cell, and we can add this cell to free list.
+                    debug!(
+                        "{:?} Free cell: {}, last cell in freelist is {}",
+                        self, cell, last
+                    );
+
+                    // Clear alloc bit: we don't know where the object reference actually is, so we bulk zero the cell.
+                    #[cfg(feature = "global_alloc_bit")]
+                    crate::util::alloc_bit::bzero_alloc_bit(cell, cell_size);
+
+                    // store the previous cell to make the free list
+                    debug_assert!(last.is_zero() || (last >= self.start() && last < self.end()));
+                    unsafe {
+                        cell.store::<Address>(last);
+                    }
+                    last = cell;
+                    cell += cell_size;
+                    debug_assert_eq!(cursor, cell);
+                }
+            }
+        }
+
+        self.store_free_list(last);
+    }
+
+    /// Get the chunk containing the block.
+    #[inline(always)]
+    pub fn chunk(&self) -> Chunk {
+        Chunk::from_unaligned_address(self.start())
+    }
+
+    /// Initialize a clean block after acquired from page-resource.
+    #[inline]
+    pub fn init(&self) {
+        self.set_state(BlockState::Unmarked);
+    }
+
+    /// Deinitalize a block before releasing.
+    #[inline]
+    pub fn deinit(&self) {
+        self.set_state(BlockState::Unallocated);
+    }
+}
+
+/// The block allocation state.
+#[derive(Debug, PartialEq, Clone, Copy)]
+pub enum BlockState {
+    /// the block is not allocated.
+    Unallocated,
+    /// the block is allocated but not marked.
+    Unmarked,
+    /// the block is allocated and marked.
+    Marked,
+}
+
+impl BlockState {
+    /// Private constant
+    const MARK_UNALLOCATED: u8 = 0;
+    /// Private constant
+    const MARK_UNMARKED: u8 = u8::MAX;
+    /// Private constant
+    const MARK_MARKED: u8 = u8::MAX - 1;
+}
+
+impl From<u8> for BlockState {
+    #[inline(always)]
+    fn from(state: u8) -> Self {
+        match state {
+            Self::MARK_UNALLOCATED => BlockState::Unallocated,
+            Self::MARK_UNMARKED => BlockState::Unmarked,
+            Self::MARK_MARKED => BlockState::Marked,
+            _ => unreachable!(),
+        }
+    }
+}
+
+impl From<BlockState> for u8 {
+    #[inline(always)]
+    fn from(state: BlockState) -> Self {
+        match state {
+            BlockState::Unallocated => BlockState::MARK_UNALLOCATED,
+            BlockState::Unmarked => BlockState::MARK_UNMARKED,
+            BlockState::Marked => BlockState::MARK_MARKED,
+        }
+    }
+}
diff --git a/src/policy/marksweepspace/native_ms/block_list.rs b/src/policy/marksweepspace/native_ms/block_list.rs
new file mode 100644
index 0000000000..8e6b6a33f3
--- /dev/null
+++ b/src/policy/marksweepspace/native_ms/block_list.rs
@@ -0,0 +1,322 @@
+use super::Block;
+use crate::util::alloc::allocator;
+use crate::util::linear_scan::Region;
+use crate::vm::VMBinding;
+use std::sync::atomic::AtomicBool;
+use std::sync::atomic::Ordering;
+
+/// List of blocks owned by the allocator
+#[derive(Debug)]
+#[repr(C)]
+pub struct BlockList {
+    pub first: Option<Block>,
+    pub last: Option<Block>,
+    pub size: usize,
+    pub lock: AtomicBool,
+}
+
+impl BlockList {
+    const fn new(size: usize) -> BlockList {
+        BlockList {
+            first: None,
+            last: None,
+            size,
+            lock: AtomicBool::new(false),
+        }
+    }
+
+    /// List has no blocks
+    pub fn is_empty(&self) -> bool {
+        self.first.is_none()
+    }
+
+    /// Remove a block from the list
+    pub fn remove(&mut self, block: Block) {
+        match (block.load_prev_block(), block.load_next_block()) {
+            (None, None) => {
+                self.first = None;
+                self.last = None;
+            }
+            (None, Some(next)) => {
+                next.clear_prev_block();
+                self.first = Some(next);
+                next.store_block_list(self);
+            }
+            (Some(prev), None) => {
+                prev.clear_next_block();
+                self.last = Some(prev);
+                prev.store_block_list(self);
+            }
+            (Some(prev), Some(next)) => {
+                prev.store_next_block(next);
+                next.store_prev_block(prev);
+            }
+        }
+    }
+
+    /// Pop the first block in the list
+    pub fn pop(&mut self) -> Option<Block> {
+        if let Some(head) = self.first {
+            if let Some(next) = head.load_next_block() {
+                self.first = Some(next);
+                next.clear_prev_block();
+                next.store_block_list(self);
+            } else {
+                self.first = None;
+                self.last = None;
+            }
+            head.clear_next_block();
+            head.clear_prev_block();
+            Some(head)
+        } else {
+            None
+        }
+    }
+
+    /// Push block to the front of the list
+    pub fn push(&mut self, block: Block) {
+        if self.is_empty() {
+            block.clear_next_block();
+            block.clear_prev_block();
+            self.first = Some(block);
+            self.last = Some(block);
+        } else {
+            let self_head = self.first.unwrap();
+            block.store_next_block(self_head);
+            self_head.store_prev_block(block);
+            block.clear_prev_block();
+            self.first = Some(block);
+        }
+        block.store_block_list(self);
+    }
+
+    /// Moves all the blocks of `other` into `self`, leaving `other` empty.
+    pub fn append(&mut self, other: &mut BlockList) {
+        debug_assert_eq!(self.size, other.size);
+        if !other.is_empty() {
+            debug_assert!(
+                other.first.unwrap().load_prev_block().is_none(),
+                "The other list's head has prev block: prev{} -> head{}",
+                other.first.unwrap().load_prev_block().unwrap().start(),
+                other.first.unwrap().start()
+            );
+            if self.is_empty() {
+                self.first = other.first;
+                self.last = other.last;
+            } else {
+                debug_assert!(
+                    self.first.unwrap().load_prev_block().is_none(),
+                    "Current list's head has prev block: prev{} -> head{}",
+                    self.first.unwrap().load_prev_block().unwrap().start(),
+                    self.first.unwrap().start()
+                );
+                let self_tail = self.last.unwrap();
+                let other_head = other.first.unwrap();
+                self_tail.store_next_block(other_head);
+                other_head.store_prev_block(self_tail);
+                self.last = other.last;
+            }
+            let mut cursor = other.first;
+            while let Some(block) = cursor {
+                block.store_block_list(self);
+                cursor = block.load_next_block();
+            }
+            other.reset();
+        }
+    }
+
+    /// Remove all blocks
+    fn reset(&mut self) {
+        self.first = None;
+        self.last = None;
+    }
+
+    /// Lock the list. The MiMalloc allocator mostly uses thread-local block lists, and those operations on the list
+    /// do not need synchronisation. However, in cases where a block list may be accessed by multiple threads, we need
+    /// to lock the list before accessing it.
+    ///
+    /// Our current sole use for locking is parallel sweeping. During the Release phase, multiple GC worker threads can
+    /// sweep chunks and release mutators at the same time, and the same `BlockList` can be reached by traversing blocks in a chunk,
+    /// and also by traversing blocks held by a mutator.  This lock is necessary to prevent
+    /// multiple GC workers from mutating the same `BlockList` instance.
+    pub fn lock(&mut self) {
+        let mut success = false;
+        while !success {
+            success = self
+                .lock
+                .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
+                .is_ok();
+        }
+    }
+
+    /// Unlock list. See the comments on the lock method.
+    pub fn unlock(&mut self) {
+        self.lock.store(false, Ordering::SeqCst);
+    }
+}
+
+/// Log2 of pointer size
+const MI_INTPTR_SHIFT: usize = crate::util::constants::LOG_BYTES_IN_ADDRESS as usize;
+/// pointer size in bytes
+const MI_INTPTR_SIZE: usize = 1 << MI_INTPTR_SHIFT;
+/// pointer size in bits
+const MI_INTPTR_BITS: usize = MI_INTPTR_SIZE * 8;
+/// Number of bins in BlockLists. Reserve bin0 as an empty bin.
+pub(crate) const MI_BIN_FULL: usize = MAX_BIN + 1;
+/// The largest valid bin.
+pub(crate) const MAX_BIN: usize = 48;
+
+/// Largest object size allowed with our mimalloc implementation, in bytes
+pub(crate) const MI_LARGE_OBJ_SIZE_MAX: usize = MAX_BIN_SIZE;
+/// Largest object size in words
+const MI_LARGE_OBJ_WSIZE_MAX: usize = MI_LARGE_OBJ_SIZE_MAX / MI_INTPTR_SIZE;
+/// The object size for the last bin. We should not try allocate objects larger than this with the allocator.
+pub(crate) const MAX_BIN_SIZE: usize = 8192 * MI_INTPTR_SIZE;
+
+/// All the bins for the block lists
+// Each block list takes roughly 8bytes * 4 * 49 = 1658 bytes. It is more reasonable to heap allocate them, and
+// just put them behind a boxed pointer.
+pub type BlockLists = Box<[BlockList; MAX_BIN + 1]>;
+
+/// Create an empty set of block lists of different size classes (bins)
+pub(crate) fn new_empty_block_lists() -> BlockLists {
+    let ret = Box::new([
+        BlockList::new(MI_INTPTR_SIZE),
+        BlockList::new(MI_INTPTR_SIZE),
+        BlockList::new(2 * MI_INTPTR_SIZE),
+        BlockList::new(3 * MI_INTPTR_SIZE),
+        BlockList::new(4 * MI_INTPTR_SIZE),
+        BlockList::new(5 * MI_INTPTR_SIZE),
+        BlockList::new(6 * MI_INTPTR_SIZE),
+        BlockList::new(7 * MI_INTPTR_SIZE),
+        BlockList::new(8 * MI_INTPTR_SIZE), /* 8 */
+        BlockList::new(10 * MI_INTPTR_SIZE),
+        BlockList::new(12 * MI_INTPTR_SIZE),
+        BlockList::new(14 * MI_INTPTR_SIZE),
+        BlockList::new(16 * MI_INTPTR_SIZE),
+        BlockList::new(20 * MI_INTPTR_SIZE),
+        BlockList::new(24 * MI_INTPTR_SIZE),
+        BlockList::new(28 * MI_INTPTR_SIZE),
+        BlockList::new(32 * MI_INTPTR_SIZE), /* 16 */
+        BlockList::new(40 * MI_INTPTR_SIZE),
+        BlockList::new(48 * MI_INTPTR_SIZE),
+        BlockList::new(56 * MI_INTPTR_SIZE),
+        BlockList::new(64 * MI_INTPTR_SIZE),
+        BlockList::new(80 * MI_INTPTR_SIZE),
+        BlockList::new(96 * MI_INTPTR_SIZE),
+        BlockList::new(112 * MI_INTPTR_SIZE),
+        BlockList::new(128 * MI_INTPTR_SIZE), /* 24 */
+        BlockList::new(160 * MI_INTPTR_SIZE),
+        BlockList::new(192 * MI_INTPTR_SIZE),
+        BlockList::new(224 * MI_INTPTR_SIZE),
+        BlockList::new(256 * MI_INTPTR_SIZE),
+        BlockList::new(320 * MI_INTPTR_SIZE),
+        BlockList::new(384 * MI_INTPTR_SIZE),
+        BlockList::new(448 * MI_INTPTR_SIZE),
+        BlockList::new(512 * MI_INTPTR_SIZE), /* 32 */
+        BlockList::new(640 * MI_INTPTR_SIZE),
+        BlockList::new(768 * MI_INTPTR_SIZE),
+        BlockList::new(896 * MI_INTPTR_SIZE),
+        BlockList::new(1024 * MI_INTPTR_SIZE),
+        BlockList::new(1280 * MI_INTPTR_SIZE),
+        BlockList::new(1536 * MI_INTPTR_SIZE),
+        BlockList::new(1792 * MI_INTPTR_SIZE),
+        BlockList::new(2048 * MI_INTPTR_SIZE), /* 40 */
+        BlockList::new(2560 * MI_INTPTR_SIZE),
+        BlockList::new(3072 * MI_INTPTR_SIZE),
+        BlockList::new(3584 * MI_INTPTR_SIZE),
+        BlockList::new(4096 * MI_INTPTR_SIZE),
+        BlockList::new(5120 * MI_INTPTR_SIZE),
+        BlockList::new(6144 * MI_INTPTR_SIZE),
+        BlockList::new(7168 * MI_INTPTR_SIZE),
+        BlockList::new(8192 * MI_INTPTR_SIZE), /* 48 */
+    ]);
+
+    debug_assert_eq!(
+        ret[MAX_BIN].size, MAX_BIN_SIZE,
+        "MAX_BIN_SIZE = {}, actual max bin size  = {}, please update the constants",
+        MAX_BIN_SIZE, ret[MAX_BIN].size
+    );
+
+    ret
+}
+
+/// Returns how many pages the block lists uses.
+#[allow(unused)]
+pub(crate) fn pages_used_by_blocklists(lists: &BlockLists) -> usize {
+    let mut pages = 0;
+    for bin in 1..=MAX_BIN {
+        let list = &lists[bin];
+
+        // walk the blocks
+        let mut cursor = list.first;
+        while let Some(block) = cursor {
+            pages += Block::BYTES >> crate::util::constants::LOG_BYTES_IN_PAGE;
+            cursor = block.load_next_block();
+        }
+    }
+
+    pages
+}
+
+/// Align a byte size to a size in machine words
+/// i.e. byte size == `wsize*sizeof(void*)`
+/// adapted from _mi_wsize_from_size in mimalloc
+fn mi_wsize_from_size(size: usize) -> usize {
+    (size + MI_INTPTR_SIZE - 1) / MI_INTPTR_SIZE
+}
+
+pub fn mi_bin<VM: VMBinding>(size: usize, align: usize) -> usize {
+    let size = allocator::get_maximum_aligned_size::<VM>(size, align);
+    mi_bin_from_size(size)
+}
+
+fn mi_bin_from_size(size: usize) -> usize {
+    // adapted from _mi_bin in mimalloc
+    let mut wsize: usize = mi_wsize_from_size(size);
+    debug_assert!(wsize <= MI_LARGE_OBJ_WSIZE_MAX);
+    let bin: u8;
+    if wsize <= 1 {
+        bin = 1;
+    } else if wsize <= 8 {
+        bin = wsize as u8;
+        // bin = ((wsize + 1) & !1) as u8; // round to double word sizes
+    } else {
+        wsize -= 1;
+        let b = (MI_INTPTR_BITS - 1 - usize::leading_zeros(wsize) as usize) as u8; // note: wsize != 0
+        bin = ((b << 2) + ((wsize >> (b - 2)) & 0x03) as u8) - 3;
+    }
+    bin as usize
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn get_bin_size_range(bin: usize, bins: &BlockLists) -> Option<(usize, usize)> {
+        if bin == 0 || bin > MAX_BIN {
+            None
+        } else if bin == 1 {
+            Some((0, bins[1].size))
+        } else {
+            Some((bins[bin - 1].size, bins[bin].size))
+        }
+    }
+
+    #[test]
+    fn test_mi_bin() {
+        let block_lists = new_empty_block_lists();
+        for size in 0..=MAX_BIN_SIZE {
+            let bin = mi_bin_from_size(size);
+            let bin_range = get_bin_size_range(bin, &block_lists);
+            assert!(bin_range.is_some(), "Invalid bin {} for size {}", bin, size);
+            assert!(
+                size >= bin_range.unwrap().0 && bin < bin_range.unwrap().1,
+                "Assigning size={} to bin={} ({:?}) incorrect",
+                size,
+                bin,
+                bin_range.unwrap()
+            );
+        }
+    }
+}
diff --git a/src/policy/marksweepspace/native_ms/global.rs b/src/policy/marksweepspace/native_ms/global.rs
new file mode 100644
index 0000000000..fb802c17f8
--- /dev/null
+++ b/src/policy/marksweepspace/native_ms/global.rs
@@ -0,0 +1,375 @@
+use std::sync::Arc;
+
+use atomic::Ordering;
+
+use crate::{
+    policy::{marksweepspace::native_ms::*, sft::GCWorkerMutRef, space::SpaceOptions},
+    scheduler::{GCWorkScheduler, GCWorker},
+    util::{
+        copy::CopySemantics,
+        heap::{
+            layout::heap_layout::{Mmapper, VMMap},
+            FreeListPageResource, HeapMeta, VMRequest,
+        },
+        metadata::{
+            self,
+            side_metadata::{SideMetadataContext, SideMetadataSpec},
+            MetadataSpec,
+        },
+        ObjectReference,
+    },
+    vm::VMBinding,
+};
+
+#[cfg(feature = "is_mmtk_object")]
+use crate::util::Address;
+
+use crate::plan::ObjectQueue;
+use crate::plan::VectorObjectQueue;
+use crate::policy::sft::SFT;
+use crate::policy::space::{CommonSpace, Space};
+use crate::util::constants::LOG_BYTES_IN_PAGE;
+use crate::util::heap::chunk_map::*;
+use crate::util::linear_scan::Region;
+use crate::util::VMThread;
+use crate::vm::ObjectModel;
+use std::sync::Mutex;
+
+/// The result for `MarkSweepSpace.acquire_block()`. `MarkSweepSpace` will attempt
+/// to allocate from abandoned blocks first. If none found, it will get a new block
+/// from the page resource.
+pub enum BlockAcquireResult {
+    Exhausted,
+    /// A new block we just acquired from the page resource
+    Fresh(Block),
+    /// An available block. The block can be directly used if there is any free cell in it.
+    AbandonedAvailable(Block),
+    /// An unswept block. The block needs to be swept first before it can be used.
+    AbandonedUnswept(Block),
+}
+
+/// A mark sweep space.
+pub struct MarkSweepSpace<VM: VMBinding> {
+    pub common: CommonSpace<VM>,
+    pr: FreeListPageResource<VM>,
+    /// Allocation status for all chunks in MS space
+    pub chunk_map: ChunkMap,
+    /// Work packet scheduler
+    scheduler: Arc<GCWorkScheduler<VM>>,
+    /// Abandoned blocks. If a mutator dies, all its blocks go to this abandoned block
+    /// lists. In a GC, we also 'flush' all the local blocks to this global pool so they
+    /// can be used by allocators from other threads.
+    pub abandoned: Mutex<AbandonedBlockLists>,
+}
+
+pub struct AbandonedBlockLists {
+    pub available: BlockLists,
+    pub unswept: BlockLists,
+    pub consumed: BlockLists,
+}
+
+impl AbandonedBlockLists {
+    fn move_consumed_to_unswept(&mut self) {
+        let mut i = 0;
+        while i < MI_BIN_FULL {
+            if !self.consumed[i].is_empty() {
+                self.unswept[i].append(&mut self.consumed[i]);
+            }
+            i += 1;
+        }
+    }
+}
+
+impl<VM: VMBinding> SFT for MarkSweepSpace<VM> {
+    fn name(&self) -> &str {
+        self.common.name
+    }
+
+    fn is_live(&self, object: crate::util::ObjectReference) -> bool {
+        VM::VMObjectModel::LOCAL_MARK_BIT_SPEC.is_marked::<VM>(object, Ordering::SeqCst)
+    }
+
+    fn is_movable(&self) -> bool {
+        false
+    }
+
+    #[cfg(feature = "sanity")]
+    fn is_sane(&self) -> bool {
+        true
+    }
+
+    fn initialize_object_metadata(&self, _object: crate::util::ObjectReference, _alloc: bool) {
+        #[cfg(feature = "global_alloc_bit")]
+        crate::util::alloc_bit::set_alloc_bit::<VM>(_object);
+    }
+
+    #[cfg(feature = "is_mmtk_object")]
+    #[inline(always)]
+    fn is_mmtk_object(&self, addr: Address) -> bool {
+        crate::util::alloc_bit::is_alloced_object::<VM>(addr).is_some()
+    }
+
+    fn sft_trace_object(
+        &self,
+        queue: &mut VectorObjectQueue,
+        object: ObjectReference,
+        _worker: GCWorkerMutRef,
+    ) -> ObjectReference {
+        self.trace_object(queue, object)
+    }
+}
+
+impl<VM: VMBinding> Space<VM> for MarkSweepSpace<VM> {
+    fn as_space(&self) -> &dyn Space<VM> {
+        self
+    }
+
+    fn as_sft(&self) -> &(dyn SFT + Sync + 'static) {
+        self
+    }
+
+    fn get_page_resource(&self) -> &dyn crate::util::heap::PageResource<VM> {
+        &self.pr
+    }
+
+    fn initialize_sft(&self) {
+        self.common().initialize_sft(self.as_sft())
+    }
+
+    fn common(&self) -> &CommonSpace<VM> {
+        &self.common
+    }
+
+    fn release_multiple_pages(&mut self, _start: crate::util::Address) {
+        todo!()
+    }
+}
+
+impl<VM: VMBinding> crate::policy::gc_work::PolicyTraceObject<VM> for MarkSweepSpace<VM> {
+    fn trace_object<Q: ObjectQueue, const KIND: crate::policy::gc_work::TraceKind>(
+        &self,
+        queue: &mut Q,
+        object: ObjectReference,
+        _copy: Option<CopySemantics>,
+        _worker: &mut GCWorker<VM>,
+    ) -> ObjectReference {
+        self.trace_object(queue, object)
+    }
+
+    fn may_move_objects<const KIND: crate::policy::gc_work::TraceKind>() -> bool {
+        false
+    }
+}
+
+// We cannot allocate objects that are larger than the max bin size.
+#[allow(dead_code)]
+pub const MAX_OBJECT_SIZE: usize = crate::policy::marksweepspace::native_ms::MI_LARGE_OBJ_SIZE_MAX;
+
+impl<VM: VMBinding> MarkSweepSpace<VM> {
+    pub fn extend_global_side_metadata_specs(_specs: &mut Vec<SideMetadataSpec>) {
+        // MarkSweepSpace does not need any special global specs. This method exists, as
+        // we need this method for MallocSpace, and we want those two spaces to be used interchangably.
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        name: &'static str,
+        zeroed: bool,
+        vmrequest: VMRequest,
+        global_side_metadata_specs: Vec<SideMetadataSpec>,
+        vm_map: &'static VMMap,
+        mmapper: &'static Mmapper,
+        heap: &mut HeapMeta,
+        scheduler: Arc<GCWorkScheduler<VM>>,
+    ) -> MarkSweepSpace<VM> {
+        let local_specs = {
+            metadata::extract_side_metadata(&vec![
+                MetadataSpec::OnSide(Block::NEXT_BLOCK_TABLE),
+                MetadataSpec::OnSide(Block::PREV_BLOCK_TABLE),
+                MetadataSpec::OnSide(Block::FREE_LIST_TABLE),
+                MetadataSpec::OnSide(Block::SIZE_TABLE),
+                #[cfg(feature = "malloc_native_mimalloc")]
+                MetadataSpec::OnSide(Block::LOCAL_FREE_LIST_TABLE),
+                #[cfg(feature = "malloc_native_mimalloc")]
+                MetadataSpec::OnSide(Block::THREAD_FREE_LIST_TABLE),
+                MetadataSpec::OnSide(Block::BLOCK_LIST_TABLE),
+                MetadataSpec::OnSide(Block::TLS_TABLE),
+                MetadataSpec::OnSide(Block::MARK_TABLE),
+                MetadataSpec::OnSide(ChunkMap::ALLOC_TABLE),
+                *VM::VMObjectModel::LOCAL_MARK_BIT_SPEC,
+            ])
+        };
+
+        let common = CommonSpace::new(
+            SpaceOptions {
+                name,
+                movable: false,
+                immortal: false,
+                needs_log_bit: false,
+                zeroed,
+                vmrequest,
+                side_metadata_specs: SideMetadataContext {
+                    global: global_side_metadata_specs,
+                    local: local_specs,
+                },
+            },
+            vm_map,
+            mmapper,
+            heap,
+        );
+        MarkSweepSpace {
+            pr: if vmrequest.is_discontiguous() {
+                FreeListPageResource::new_discontiguous(vm_map)
+            } else {
+                FreeListPageResource::new_contiguous(common.start, common.extent, vm_map)
+            },
+            common,
+            chunk_map: ChunkMap::new(),
+            scheduler,
+            abandoned: Mutex::new(AbandonedBlockLists {
+                available: new_empty_block_lists(),
+                unswept: new_empty_block_lists(),
+                consumed: new_empty_block_lists(),
+            }),
+        }
+    }
+
+    fn trace_object<Q: ObjectQueue>(
+        &self,
+        queue: &mut Q,
+        object: ObjectReference,
+    ) -> ObjectReference {
+        if object.is_null() {
+            return object;
+        }
+        debug_assert!(
+            self.in_space(object),
+            "Cannot mark an object {} that was not alloced by free list allocator.",
+            object,
+        );
+        if !VM::VMObjectModel::LOCAL_MARK_BIT_SPEC.is_marked::<VM>(object, Ordering::SeqCst) {
+            VM::VMObjectModel::LOCAL_MARK_BIT_SPEC.mark::<VM>(object, Ordering::SeqCst);
+            let block = Block::containing::<VM>(object);
+            block.set_state(BlockState::Marked);
+            queue.enqueue(object);
+        }
+        object
+    }
+
+    pub fn record_new_block(&self, block: Block) {
+        block.init();
+        self.chunk_map.set(block.chunk(), ChunkState::Allocated);
+    }
+
+    #[inline]
+    pub fn get_next_metadata_spec(&self) -> SideMetadataSpec {
+        Block::NEXT_BLOCK_TABLE
+    }
+
+    pub fn prepare(&mut self) {
+        if let MetadataSpec::OnSide(side) = *VM::VMObjectModel::LOCAL_MARK_BIT_SPEC {
+            for chunk in self.chunk_map.all_chunks() {
+                side.bzero_metadata(chunk.start(), Chunk::BYTES);
+            }
+        } else {
+            unimplemented!("in header mark bit is not supported");
+        }
+    }
+
+    pub fn release(&mut self) {
+        // We sweep and release unmarked blocks here. For sweeping cells inside each block, we either
+        // do that when we release mutators (eager sweeping), or do that at allocation time (lazy sweeping).
+        use crate::scheduler::WorkBucketStage;
+        let work_packets = self.generate_sweep_tasks();
+        self.scheduler.work_buckets[WorkBucketStage::Release].bulk_add(work_packets);
+
+        let mut abandoned = self.abandoned.lock().unwrap();
+        abandoned.move_consumed_to_unswept();
+    }
+
+    /// Release a block.
+    pub fn release_block(&self, block: Block) {
+        self.block_clear_metadata(block);
+
+        block.deinit();
+        self.pr.release_pages(block.start());
+    }
+
+    pub fn block_clear_metadata(&self, block: Block) {
+        for metadata_spec in Block::METADATA_SPECS {
+            metadata_spec.set_zero_atomic(block.start(), Ordering::SeqCst);
+        }
+        #[cfg(feature = "global_alloc_bit")]
+        crate::util::alloc_bit::bzero_alloc_bit(block.start(), Block::BYTES);
+    }
+
+    pub fn acquire_block(&self, tls: VMThread, size: usize, align: usize) -> BlockAcquireResult {
+        {
+            let mut abandoned = self.abandoned.lock().unwrap();
+            let bin = mi_bin::<VM>(size, align);
+
+            {
+                let abandoned_available = &mut abandoned.available;
+                if !abandoned_available[bin].is_empty() {
+                    let block = abandoned_available[bin].pop().unwrap();
+                    return BlockAcquireResult::AbandonedAvailable(block);
+                }
+            }
+
+            {
+                let abandoned_unswept = &mut abandoned.unswept;
+                if !abandoned_unswept[bin].is_empty() {
+                    let block = abandoned_unswept[bin].pop().unwrap();
+                    return BlockAcquireResult::AbandonedUnswept(block);
+                }
+            }
+        }
+
+        let acquired = self.acquire(tls, Block::BYTES >> LOG_BYTES_IN_PAGE);
+        if acquired.is_zero() {
+            BlockAcquireResult::Exhausted
+        } else {
+            BlockAcquireResult::Fresh(Block::from_unaligned_address(acquired))
+        }
+    }
+
+    pub fn generate_sweep_tasks(&self) -> Vec<Box<dyn GCWork<VM>>> {
+        // # Safety: ImmixSpace reference is always valid within this collection cycle.
+        let space = unsafe { &*(self as *const Self) };
+        self.chunk_map
+            .generate_tasks(|chunk| Box::new(SweepChunk { space, chunk }))
+    }
+}
+
+use crate::scheduler::GCWork;
+use crate::MMTK;
+
+/// Chunk sweeping work packet.
+struct SweepChunk<VM: VMBinding> {
+    space: &'static MarkSweepSpace<VM>,
+    chunk: Chunk,
+}
+
+impl<VM: VMBinding> GCWork<VM> for SweepChunk<VM> {
+    #[inline]
+    fn do_work(&mut self, _worker: &mut GCWorker<VM>, _mmtk: &'static MMTK<VM>) {
+        debug_assert!(self.space.chunk_map.get(self.chunk) == ChunkState::Allocated);
+        // number of allocated blocks.
+        let mut allocated_blocks = 0;
+        // Iterate over all allocated blocks in this chunk.
+        for block in self
+            .chunk
+            .iter_region::<Block>()
+            .filter(|block| block.get_state() != BlockState::Unallocated)
+        {
+            if !block.attempt_release(self.space) {
+                // Block is live. Increment the allocated block count.
+                allocated_blocks += 1;
+            }
+        }
+        // Set this chunk as free if there is not live blocks.
+        if allocated_blocks == 0 {
+            self.space.chunk_map.set(self.chunk, ChunkState::Free)
+        }
+    }
+}
diff --git a/src/policy/marksweepspace/native_ms/mod.rs b/src/policy/marksweepspace/native_ms/mod.rs
new file mode 100644
index 0000000000..d0cd59428b
--- /dev/null
+++ b/src/policy/marksweepspace/native_ms/mod.rs
@@ -0,0 +1,7 @@
+mod block;
+mod block_list;
+mod global;
+
+pub use block::*;
+pub use block_list::*;
+pub use global::*;
diff --git a/src/policy/mod.rs b/src/policy/mod.rs
index 51bb5c0adb..2e459db095 100644
--- a/src/policy/mod.rs
+++ b/src/policy/mod.rs
@@ -25,5 +25,5 @@ pub mod immix;
 pub mod immortalspace;
 pub mod largeobjectspace;
 pub mod lockfreeimmortalspace;
-pub mod mallocspace;
 pub mod markcompactspace;
+pub mod marksweepspace;
diff --git a/src/util/address.rs b/src/util/address.rs
index 16369c42ac..67549b5a29 100644
--- a/src/util/address.rs
+++ b/src/util/address.rs
@@ -501,7 +501,9 @@ impl ObjectReference {
     #[inline(always)]
     pub fn to_address<VM: VMBinding>(self) -> Address {
         use crate::vm::ObjectModel;
-        VM::VMObjectModel::ref_to_address(self)
+        let to_address = VM::VMObjectModel::ref_to_address(self);
+        debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || to_address == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, ref_to_address() returns {}", self, to_address);
+        to_address
     }
 
     /// Get the header base address from an object reference. This method is used by MMTk to get a base address for the
@@ -513,13 +515,23 @@ impl ObjectReference {
         VM::VMObjectModel::ref_to_header(self)
     }
 
+    #[inline(always)]
+    pub fn to_object_start<VM: VMBinding>(self) -> Address {
+        use crate::vm::ObjectModel;
+        let object_start = VM::VMObjectModel::ref_to_object_start(self);
+        debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || object_start == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, ref_to_address() returns {}", self, object_start);
+        object_start
+    }
+
     /// Get the object reference from an address that is returned from [`crate::util::address::ObjectReference::to_address`]
     /// or [`crate::vm::ObjectModel::ref_to_address`]. This method is syntactic sugar for [`crate::vm::ObjectModel::address_to_ref`].
     /// See the comments on [`crate::vm::ObjectModel::address_to_ref`].
     #[inline(always)]
     pub fn from_address<VM: VMBinding>(addr: Address) -> ObjectReference {
         use crate::vm::ObjectModel;
-        VM::VMObjectModel::address_to_ref(addr)
+        let obj = VM::VMObjectModel::address_to_ref(addr);
+        debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || addr == obj.to_raw_address(), "The binding claims unified object reference address, but for address {}, address_to_ref() returns {}", addr, obj);
+        obj
     }
 
     /// is this object reference null reference?
diff --git a/src/util/alloc/allocator.rs b/src/util/alloc/allocator.rs
index 7f35496aa7..10f3def182 100644
--- a/src/util/alloc/allocator.rs
+++ b/src/util/alloc/allocator.rs
@@ -26,7 +26,7 @@ pub fn align_allocation_no_fill<VM: VMBinding>(
     alignment: usize,
     offset: isize,
 ) -> Address {
-    align_allocation::<VM>(region, alignment, offset, VM::MIN_ALIGNMENT, false)
+    align_allocation_inner::<VM>(region, alignment, offset, VM::MIN_ALIGNMENT, false)
 }
 
 #[inline(always)]
@@ -34,6 +34,15 @@ pub fn align_allocation<VM: VMBinding>(
     region: Address,
     alignment: usize,
     offset: isize,
+) -> Address {
+    align_allocation_inner::<VM>(region, alignment, offset, VM::MIN_ALIGNMENT, true)
+}
+
+#[inline(always)]
+pub fn align_allocation_inner<VM: VMBinding>(
+    region: Address,
+    alignment: usize,
+    offset: isize,
     known_alignment: usize,
     fillalignmentgap: bool,
 ) -> Address {
@@ -91,7 +100,12 @@ pub fn fill_alignment_gap<VM: VMBinding>(immut_start: Address, end: Address) {
 }
 
 #[inline(always)]
-pub fn get_maximum_aligned_size<VM: VMBinding>(
+pub fn get_maximum_aligned_size<VM: VMBinding>(size: usize, alignment: usize) -> usize {
+    get_maximum_aligned_size_inner::<VM>(size, alignment, VM::MIN_ALIGNMENT)
+}
+
+#[inline(always)]
+pub fn get_maximum_aligned_size_inner<VM: VMBinding>(
     size: usize,
     alignment: usize,
     known_alignment: usize,
@@ -352,6 +366,12 @@ pub trait Allocator<VM: VMBinding>: Downcast {
         }
         self.alloc_slow_once(size, align, offset)
     }
+
+    /// The [`crate::plan::Mutator`] that includes this allocator is going to be destroyed. Some allocators
+    /// may need to save/transfer its thread local data to the space.
+    fn on_mutator_destroy(&mut self) {
+        // By default, do nothing
+    }
 }
 
 impl_downcast!(Allocator<VM> where VM: VMBinding);
diff --git a/src/util/alloc/allocators.rs b/src/util/alloc/allocators.rs
index 2e3294d31c..209cd5b330 100644
--- a/src/util/alloc/allocators.rs
+++ b/src/util/alloc/allocators.rs
@@ -2,7 +2,8 @@ use std::mem::MaybeUninit;
 
 use crate::plan::Plan;
 use crate::policy::largeobjectspace::LargeObjectSpace;
-use crate::policy::mallocspace::MallocSpace;
+use crate::policy::marksweepspace::malloc_ms::MallocSpace;
+use crate::policy::marksweepspace::native_ms::MarkSweepSpace;
 use crate::policy::space::Space;
 use crate::util::alloc::LargeObjectAllocator;
 use crate::util::alloc::MallocAllocator;
@@ -10,12 +11,14 @@ use crate::util::alloc::{Allocator, BumpAllocator, ImmixAllocator};
 use crate::util::VMMutatorThread;
 use crate::vm::VMBinding;
 
+use super::FreeListAllocator;
 use super::MarkCompactAllocator;
 
 pub(crate) const MAX_BUMP_ALLOCATORS: usize = 6;
 pub(crate) const MAX_LARGE_OBJECT_ALLOCATORS: usize = 2;
 pub(crate) const MAX_MALLOC_ALLOCATORS: usize = 1;
 pub(crate) const MAX_IMMIX_ALLOCATORS: usize = 1;
+pub(crate) const MAX_FREE_LIST_ALLOCATORS: usize = 2;
 pub(crate) const MAX_MARK_COMPACT_ALLOCATORS: usize = 1;
 
 // The allocators set owned by each mutator. We provide a fixed number of allocators for each allocator type in the mutator,
@@ -28,6 +31,7 @@ pub struct Allocators<VM: VMBinding> {
     pub large_object: [MaybeUninit<LargeObjectAllocator<VM>>; MAX_LARGE_OBJECT_ALLOCATORS],
     pub malloc: [MaybeUninit<MallocAllocator<VM>>; MAX_MALLOC_ALLOCATORS],
     pub immix: [MaybeUninit<ImmixAllocator<VM>>; MAX_IMMIX_ALLOCATORS],
+    pub free_list: [MaybeUninit<FreeListAllocator<VM>>; MAX_FREE_LIST_ALLOCATORS],
     pub markcompact: [MaybeUninit<MarkCompactAllocator<VM>>; MAX_MARK_COMPACT_ALLOCATORS],
 }
 
@@ -44,6 +48,7 @@ impl<VM: VMBinding> Allocators<VM> {
             }
             AllocatorSelector::Malloc(index) => self.malloc[index as usize].assume_init_ref(),
             AllocatorSelector::Immix(index) => self.immix[index as usize].assume_init_ref(),
+            AllocatorSelector::FreeList(index) => self.free_list[index as usize].assume_init_ref(),
             AllocatorSelector::MarkCompact(index) => {
                 self.markcompact[index as usize].assume_init_ref()
             }
@@ -66,6 +71,7 @@ impl<VM: VMBinding> Allocators<VM> {
             }
             AllocatorSelector::Malloc(index) => self.malloc[index as usize].assume_init_mut(),
             AllocatorSelector::Immix(index) => self.immix[index as usize].assume_init_mut(),
+            AllocatorSelector::FreeList(index) => self.free_list[index as usize].assume_init_mut(),
             AllocatorSelector::MarkCompact(index) => {
                 self.markcompact[index as usize].assume_init_mut()
             }
@@ -83,6 +89,7 @@ impl<VM: VMBinding> Allocators<VM> {
             large_object: unsafe { MaybeUninit::uninit().assume_init() },
             malloc: unsafe { MaybeUninit::uninit().assume_init() },
             immix: unsafe { MaybeUninit::uninit().assume_init() },
+            free_list: unsafe { MaybeUninit::uninit().assume_init() },
             markcompact: unsafe { MaybeUninit::uninit().assume_init() },
         };
 
@@ -117,6 +124,13 @@ impl<VM: VMBinding> Allocators<VM> {
                         false,
                     ));
                 }
+                AllocatorSelector::FreeList(index) => {
+                    ret.free_list[index as usize].write(FreeListAllocator::new(
+                        mutator_tls.0,
+                        space.downcast_ref::<MarkSweepSpace<VM>>().unwrap(),
+                        plan,
+                    ));
+                }
                 AllocatorSelector::MarkCompact(index) => {
                     ret.markcompact[index as usize].write(MarkCompactAllocator::new(
                         mutator_tls.0,
@@ -145,13 +159,14 @@ impl<VM: VMBinding> Allocators<VM> {
 //   LargeObject,
 // }
 #[repr(C, u8)]
-#[derive(Copy, Clone, Debug, PartialEq)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub enum AllocatorSelector {
     BumpPointer(u8),
     LargeObject(u8),
     Malloc(u8),
     Immix(u8),
     MarkCompact(u8),
+    FreeList(u8),
     None,
 }
 
diff --git a/src/util/alloc/bumpallocator.rs b/src/util/alloc/bumpallocator.rs
index 93fda5c3b7..c6731bfac3 100644
--- a/src/util/alloc/bumpallocator.rs
+++ b/src/util/alloc/bumpallocator.rs
@@ -28,8 +28,8 @@ pub struct BumpAllocator<VM: VMBinding> {
 }
 
 impl<VM: VMBinding> BumpAllocator<VM> {
-    pub fn set_limit(&mut self, cursor: Address, limit: Address) {
-        self.cursor = cursor;
+    pub fn set_limit(&mut self, start: Address, limit: Address) {
+        self.cursor = start;
         self.limit = limit;
     }
 
diff --git a/src/util/alloc/free_list_allocator.rs b/src/util/alloc/free_list_allocator.rs
new file mode 100644
index 0000000000..9e47065a3b
--- /dev/null
+++ b/src/util/alloc/free_list_allocator.rs
@@ -0,0 +1,532 @@
+// This is a free list allocator written based on Microsoft's mimalloc allocator https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action/
+
+use crate::policy::marksweepspace::native_ms::*;
+use crate::util::alloc::allocator;
+use crate::util::alloc::Allocator;
+use crate::util::linear_scan::Region;
+use crate::util::Address;
+use crate::util::VMThread;
+use crate::vm::VMBinding;
+use crate::Plan;
+
+/// A MiMalloc free list allocator
+#[repr(C)]
+pub struct FreeListAllocator<VM: VMBinding> {
+    pub tls: VMThread,
+    space: &'static MarkSweepSpace<VM>,
+    plan: &'static dyn Plan<VM = VM>,
+    /// blocks with free space
+    pub available_blocks: BlockLists,
+    /// blocks with free space for precise stress GC
+    /// For precise stress GC, we need to be able to trigger slowpath allocation for
+    /// each allocation. To achieve this, we put available blocks to this list. So
+    /// normal fastpath allocation will fail, as they will see the block lists
+    /// as empty.
+    pub available_blocks_stress: BlockLists,
+    /// blocks that are marked, not swept
+    pub unswept_blocks: BlockLists,
+    /// full blocks
+    pub consumed_blocks: BlockLists,
+}
+
+impl<VM: VMBinding> Allocator<VM> for FreeListAllocator<VM> {
+    fn get_tls(&self) -> VMThread {
+        self.tls
+    }
+
+    fn get_space(&self) -> &'static dyn crate::policy::space::Space<VM> {
+        self.space
+    }
+
+    fn get_plan(&self) -> &'static dyn Plan<VM = VM> {
+        self.plan
+    }
+
+    // Find a block with free space and allocate to it
+    fn alloc(&mut self, size: usize, align: usize, offset: isize) -> Address {
+        debug_assert!(
+            size <= MAX_BIN_SIZE,
+            "Alloc request for {} bytes is too big.",
+            size
+        );
+        debug_assert!(align <= VM::MAX_ALIGNMENT);
+        debug_assert!(align >= VM::MIN_ALIGNMENT);
+
+        if let Some(block) = self.find_free_block_local(size, align) {
+            let cell = self.block_alloc(block);
+            if !cell.is_zero() {
+                // We succeeded in fastpath alloc, this cannot be precise stress test
+                debug_assert!(
+                    !(*self.plan.options().precise_stress
+                        && self.plan.base().is_stress_test_gc_enabled())
+                );
+
+                let res = allocator::align_allocation::<VM>(cell, align, offset);
+                // Make sure that the allocation region is within the cell
+                #[cfg(debug_assertions)]
+                {
+                    let cell_size = block.load_block_cell_size();
+                    debug_assert!(
+                        res + size <= cell + cell_size,
+                        "Allocating (size = {}, align = {}, offset = {}) to the cell {} of size {}, but the end of the allocation region {} is beyond the cell end {}",
+                        size, align, offset, cell, cell_size, res + size, cell + cell_size
+                    );
+                }
+                return res;
+            }
+        }
+
+        self.alloc_slow(size, align, offset)
+    }
+
+    fn alloc_slow_once(&mut self, size: usize, align: usize, offset: isize) -> Address {
+        // Try get a block from the space
+        if let Some(block) = self.acquire_global_block(size, align, false) {
+            let addr = self.block_alloc(block);
+            allocator::align_allocation::<VM>(addr, align, offset)
+        } else {
+            Address::ZERO
+        }
+    }
+
+    fn does_thread_local_allocation(&self) -> bool {
+        true
+    }
+
+    fn get_thread_local_buffer_granularity(&self) -> usize {
+        Block::BYTES
+    }
+
+    fn alloc_slow_once_precise_stress(
+        &mut self,
+        size: usize,
+        align: usize,
+        offset: isize,
+        need_poll: bool,
+    ) -> Address {
+        trace!("allow slow precise stress s={}", size);
+        if need_poll {
+            self.acquire_global_block(0, 0, true);
+        }
+
+        // mimic what fastpath allocation does, except that we allocate from available_blocks_stress.
+        if let Some(block) = self.find_free_block_stress(size, align) {
+            let cell = self.block_alloc(block);
+            allocator::align_allocation::<VM>(cell, align, offset)
+        } else {
+            Address::ZERO
+        }
+    }
+
+    fn on_mutator_destroy(&mut self) {
+        self.abandon_blocks();
+    }
+}
+
+impl<VM: VMBinding> FreeListAllocator<VM> {
+    // New free list allcoator
+    pub fn new(
+        tls: VMThread,
+        space: &'static MarkSweepSpace<VM>,
+        plan: &'static dyn Plan<VM = VM>,
+    ) -> Self {
+        FreeListAllocator {
+            tls,
+            space,
+            plan,
+            available_blocks: new_empty_block_lists(),
+            available_blocks_stress: new_empty_block_lists(),
+            unswept_blocks: new_empty_block_lists(),
+            consumed_blocks: new_empty_block_lists(),
+        }
+    }
+
+    // Find a free cell within a given block
+    fn block_alloc(&mut self, block: Block) -> Address {
+        let cell = block.load_free_list();
+        if cell.is_zero() {
+            return cell; // return failed allocation
+        }
+        let next_cell = unsafe { cell.load::<Address>() };
+        // Clear the link
+        unsafe { cell.store::<Address>(Address::ZERO) };
+        debug_assert!(
+            next_cell.is_zero() || block.includes_address(next_cell),
+            "next_cell {} is not in {:?}",
+            next_cell,
+            block
+        );
+        block.store_free_list(next_cell);
+
+        // Zeroing memory right before we return it.
+        // If we move the zeroing to somewhere else, we need to clear the list link here: cell.store::<Address>(Address::ZERO)
+        let cell_size = block.load_block_cell_size();
+        crate::util::memory::zero(cell, cell_size);
+
+        // Make sure the memory is zeroed. This looks silly as we zero the cell right before this check.
+        // But we would need to move the zeroing to somewhere so we can do zeroing at a coarser grainularity.
+        #[cfg(debug_assertions)]
+        {
+            let mut cursor = cell;
+            while cursor < cell + cell_size {
+                debug_assert_eq!(unsafe { cursor.load::<usize>() }, 0);
+                cursor += crate::util::constants::BYTES_IN_ADDRESS;
+            }
+        }
+
+        cell
+    }
+
+    // Find an available block when stress GC is enabled. This includes getting a block from the space.
+    fn find_free_block_stress(&mut self, size: usize, align: usize) -> Option<Block> {
+        Self::find_free_block_with(
+            &mut self.available_blocks_stress,
+            &mut self.consumed_blocks,
+            size,
+            align,
+        )
+        .or_else(|| self.recycle_local_blocks(size, align, true))
+        .or_else(|| self.acquire_global_block(size, align, true))
+    }
+
+    // Find an available block from local block lists
+    #[inline(always)]
+    fn find_free_block_local(&mut self, size: usize, align: usize) -> Option<Block> {
+        Self::find_free_block_with(
+            &mut self.available_blocks,
+            &mut self.consumed_blocks,
+            size,
+            align,
+        )
+        .or_else(|| self.recycle_local_blocks(size, align, false))
+    }
+
+    // Find an available block
+    // This will usually be the first block on the available list. If all available blocks are found
+    // to be full, other lists are searched
+    // This function allows different available block lists -- normal allocation uses self.avaialble_blocks, and precise stress test uses self.avialable_blocks_stress.
+    #[inline(always)]
+    fn find_free_block_with(
+        available_blocks: &mut BlockLists,
+        consumed_blocks: &mut BlockLists,
+        size: usize,
+        align: usize,
+    ) -> Option<Block> {
+        let bin = mi_bin::<VM>(size, align);
+        debug_assert!(bin <= MAX_BIN);
+
+        let available = &mut available_blocks[bin];
+        debug_assert!(available.size >= size);
+
+        if !available.is_empty() {
+            let mut cursor = available.first;
+
+            while let Some(block) = cursor {
+                if block.has_free_cells() {
+                    return Some(block);
+                }
+                available.pop();
+                consumed_blocks.get_mut(bin).unwrap().push(block);
+
+                cursor = available.first;
+            }
+        }
+
+        debug_assert!(available_blocks[bin].is_empty());
+        None
+    }
+
+    /// Add a block to the given bin in the available block lists. Depending on which available block list we are using, this
+    /// method may add the block to available_blocks, or available_blocks_stress.
+    #[inline(always)]
+    fn add_to_available_blocks(&mut self, bin: usize, block: Block, stress: bool) {
+        if stress {
+            debug_assert!(self.plan.base().is_precise_stress());
+            self.available_blocks_stress[bin].push(block);
+        } else {
+            self.available_blocks[bin].push(block);
+        }
+    }
+
+    /// Tries to recycle local blocks if there is any. This is a no-op for eager sweeping mark sweep.
+    #[inline]
+    fn recycle_local_blocks(
+        &mut self,
+        size: usize,
+        align: usize,
+        _stress_test: bool,
+    ) -> Option<Block> {
+        if cfg!(feature = "eager_sweeping") {
+            // We have swept blocks in the last GC. If we run out of available blocks, there is nothing we can do.
+            None
+        } else {
+            // Get blocks from unswept_blocks and attempt to sweep
+            loop {
+                let bin = mi_bin::<VM>(size, align);
+                debug_assert!(self.available_blocks[bin].is_empty()); // only use this function if there are no blocks available
+
+                if let Some(block) = self.unswept_blocks.get_mut(bin).unwrap().pop() {
+                    block.sweep::<VM>();
+                    if block.has_free_cells() {
+                        // recyclable block
+                        self.add_to_available_blocks(
+                            bin,
+                            block,
+                            self.plan.base().is_stress_test_gc_enabled(),
+                        );
+                        return Some(block);
+                    } else {
+                        // nothing was freed from this block
+                        self.consumed_blocks.get_mut(bin).unwrap().push(block);
+                    }
+                } else {
+                    return None;
+                }
+            }
+        }
+    }
+
+    /// Get a block from the space.
+    fn acquire_global_block(
+        &mut self,
+        size: usize,
+        align: usize,
+        stress_test: bool,
+    ) -> Option<Block> {
+        let bin = mi_bin::<VM>(size, align);
+        loop {
+            match self.space.acquire_block(self.tls, size, align) {
+                crate::policy::marksweepspace::native_ms::BlockAcquireResult::Exhausted => {
+                    // GC
+                    return None;
+                }
+
+                crate::policy::marksweepspace::native_ms::BlockAcquireResult::Fresh(block) => {
+                    self.add_to_available_blocks(bin, block, stress_test);
+                    self.init_block(block, self.available_blocks[bin].size);
+
+                    return Some(block);
+                }
+
+                crate::policy::marksweepspace::native_ms::BlockAcquireResult::AbandonedAvailable(block) => {
+                    block.store_tls(self.tls);
+                    if block.has_free_cells() {
+                        self.add_to_available_blocks(bin, block, stress_test);
+                        return Some(block);
+                    } else {
+                        self.consumed_blocks[bin].push(block);
+                    }
+                }
+
+                crate::policy::marksweepspace::native_ms::BlockAcquireResult::AbandonedUnswept(block) => {
+                    block.store_tls(self.tls);
+                    block.sweep::<VM>();
+                    if block.has_free_cells() {
+                        self.add_to_available_blocks(bin, block, stress_test);
+                        return Some(block);
+                    } else {
+                        self.consumed_blocks[bin].push(block);
+                    }
+                }
+            }
+        }
+    }
+
+    fn init_block(&self, block: Block, cell_size: usize) {
+        self.space.record_new_block(block);
+
+        // construct free list
+        let block_end = block.start() + Block::BYTES;
+        let mut old_cell = unsafe { Address::zero() };
+        let mut new_cell = block.start();
+
+        let final_cell = loop {
+            unsafe {
+                new_cell.store::<Address>(old_cell);
+            }
+            old_cell = new_cell;
+            new_cell += cell_size;
+            if new_cell + cell_size > block_end {
+                break old_cell;
+            };
+        };
+
+        block.store_free_list(final_cell);
+        block.store_block_cell_size(cell_size);
+        #[cfg(feature = "malloc_native_mimalloc")]
+        {
+            block.store_local_free_list(Address::ZERO);
+            block.store_thread_free_list(Address::ZERO);
+        }
+
+        self.store_block_tls(block);
+    }
+
+    #[cfg(feature = "malloc_native_mimalloc")]
+    fn free(&self, addr: Address) {
+        use crate::util::ObjectReference;
+        let block = Block::from_unaligned_address(addr);
+        let block_tls = block.load_tls();
+
+        if self.tls == block_tls {
+            // same thread that allocated
+            let local_free = block.load_local_free_list();
+            unsafe {
+                addr.store(local_free);
+            }
+            block.store_local_free_list(addr);
+        } else {
+            // different thread to allocator
+            unreachable!(
+                "tlss don't match freeing from block {}, my tls = {:?}, block tls = {:?}",
+                block.start(),
+                self.tls,
+                block.load_tls()
+            );
+
+            // I am not sure whether the following code would be used to free a block for other thread. I will just keep it here as commented out.
+            // let mut success = false;
+            // while !success {
+            //     let thread_free = FreeListAllocator::<VM>::load_thread_free_list(block);
+            //     unsafe {
+            //         addr.store(thread_free);
+            //     }
+            //     success = FreeListAllocator::<VM>::cas_thread_free_list(&self, block, thread_free, addr);
+            // }
+        }
+
+        // unset allocation bit
+        unsafe {
+            crate::util::alloc_bit::unset_alloc_bit_unsafe::<VM>(ObjectReference::from_raw_address(
+                addr,
+            ))
+        };
+    }
+
+    fn store_block_tls(&self, block: Block) {
+        block.store_tls(self.tls);
+    }
+
+    pub(crate) fn prepare(&mut self) {
+        // For lazy sweeping, it doesn't matter whether we do it in prepare or release.
+        // However, in the release phase, we will do block-level sweeping. And that will cause
+        // race if we also reset the allocator in release (which will mutate on the block lists).
+        // So we just move reset to the prepare phase.
+        #[cfg(not(feature = "eager_sweeping"))]
+        self.reset();
+    }
+
+    pub(crate) fn release(&mut self) {
+        // For eager sweeping, we have to do this in the release phase when we know the liveness of the blocks
+        #[cfg(feature = "eager_sweeping")]
+        self.reset();
+    }
+
+    /// Do we abandon allocator local blocks in reset?
+    /// We should do this for GC. Otherwise, blocks will be held by each allocator, and they cannot
+    /// be reused by other allocators. This is measured to cause up to 100% increase of the min heap size
+    /// for mark sweep.
+    const ABANDON_BLOCKS_IN_RESET: bool = true;
+
+    #[cfg(not(feature = "eager_sweeping"))]
+    fn reset(&mut self) {
+        trace!("reset");
+        // consumed and available are now unswept
+        let mut bin = 0;
+        while bin < MAX_BIN + 1 {
+            let unswept = self.unswept_blocks.get_mut(bin).unwrap();
+            unswept.lock();
+
+            let mut sweep_later = |list: &mut BlockList| {
+                list.lock();
+                unswept.append(list);
+                list.unlock();
+            };
+
+            sweep_later(&mut self.available_blocks[bin]);
+            sweep_later(&mut self.available_blocks_stress[bin]);
+            sweep_later(&mut self.consumed_blocks[bin]);
+
+            unswept.unlock();
+            bin += 1;
+        }
+
+        if Self::ABANDON_BLOCKS_IN_RESET {
+            self.abandon_blocks();
+        }
+    }
+
+    #[cfg(feature = "eager_sweeping")]
+    fn reset(&mut self) {
+        debug!("reset");
+        // sweep all blocks and push consumed onto available list
+        let mut bin = 0;
+        while bin < MAX_BIN + 1 {
+            let sweep = |first_block: Option<Block>, used_blocks: bool| {
+                let mut cursor = first_block;
+                while let Some(block) = cursor {
+                    if used_blocks {
+                        block.sweep::<VM>();
+                        cursor = block.load_next_block();
+                    } else {
+                        let next = block.load_next_block();
+                        if !block.attempt_release(self.space) {
+                            block.sweep::<VM>();
+                        }
+                        cursor = next;
+                    }
+                }
+            };
+
+            sweep(self.available_blocks[bin].first, true);
+            sweep(self.available_blocks_stress[bin].first, true);
+
+            // Sweep consumed blocks, and also push the blocks back to the available list.
+            sweep(self.consumed_blocks[bin].first, false);
+            if self.plan.base().is_precise_stress() && self.plan.base().is_stress_test_gc_enabled()
+            {
+                debug_assert!(self.plan.base().is_precise_stress());
+                self.available_blocks_stress[bin].append(&mut self.consumed_blocks[bin]);
+            } else {
+                self.available_blocks[bin].append(&mut self.consumed_blocks[bin]);
+            }
+
+            bin += 1;
+
+            if Self::ABANDON_BLOCKS_IN_RESET {
+                self.abandon_blocks();
+            }
+        }
+
+        if Self::ABANDON_BLOCKS_IN_RESET {
+            self.abandon_blocks();
+        }
+    }
+
+    fn abandon_blocks(&mut self) {
+        let mut abandoned = self.space.abandoned.lock().unwrap();
+        let mut i = 0;
+        while i < MI_BIN_FULL {
+            let available = self.available_blocks.get_mut(i).unwrap();
+            if !available.is_empty() {
+                abandoned.available[i].append(available);
+            }
+
+            let available_stress = self.available_blocks_stress.get_mut(i).unwrap();
+            if !available_stress.is_empty() {
+                abandoned.available[i].append(available_stress);
+            }
+
+            let consumed = self.consumed_blocks.get_mut(i).unwrap();
+            if !consumed.is_empty() {
+                abandoned.consumed[i].append(consumed);
+            }
+
+            let unswept = self.unswept_blocks.get_mut(i).unwrap();
+            if !unswept.is_empty() {
+                abandoned.unswept[i].append(unswept);
+            }
+            i += 1;
+        }
+    }
+}
diff --git a/src/util/alloc/immix_allocator.rs b/src/util/alloc/immix_allocator.rs
index 19690c68f3..0cc5f4e5d0 100644
--- a/src/util/alloc/immix_allocator.rs
+++ b/src/util/alloc/immix_allocator.rs
@@ -81,7 +81,7 @@ impl<VM: VMBinding> Allocator<VM> for ImmixAllocator<VM> {
                 "{:?}: Thread local buffer used up, go to alloc slow path",
                 self.tls
             );
-            if get_maximum_aligned_size::<VM>(size, align, VM::MIN_ALIGNMENT) > Line::BYTES {
+            if get_maximum_aligned_size::<VM>(size, align) > Line::BYTES {
                 // Size larger than a line: do large allocation
                 self.overflow_alloc(size, align, offset)
             } else {
@@ -328,9 +328,7 @@ impl<VM: VMBinding> ImmixAllocator<VM> {
         // size check and then return the conditions where `alloc_slow_inline()` would be called
         // in an `alloc()` call, namely when both `overflow_alloc()` and `alloc_slow_hot()` fail
         // to service the allocation request
-        if insufficient_space
-            && get_maximum_aligned_size::<VM>(size, align, VM::MIN_ALIGNMENT) > Line::BYTES
-        {
+        if insufficient_space && get_maximum_aligned_size::<VM>(size, align) > Line::BYTES {
             let start = align_allocation_no_fill::<VM>(self.large_cursor, align, offset);
             let end = start + size;
             end > self.large_limit
diff --git a/src/util/alloc/large_object_allocator.rs b/src/util/alloc/large_object_allocator.rs
index bf46fc1fd7..5c6abf2b83 100644
--- a/src/util/alloc/large_object_allocator.rs
+++ b/src/util/alloc/large_object_allocator.rs
@@ -38,7 +38,7 @@ impl<VM: VMBinding> Allocator<VM> for LargeObjectAllocator<VM> {
         let cell: Address = self.alloc_slow(size, align, offset);
         // We may get a null ptr from alloc due to the VM being OOM
         if !cell.is_zero() {
-            allocator::align_allocation::<VM>(cell, align, offset, VM::MIN_ALIGNMENT, true)
+            allocator::align_allocation::<VM>(cell, align, offset)
         } else {
             cell
         }
@@ -46,8 +46,7 @@ impl<VM: VMBinding> Allocator<VM> for LargeObjectAllocator<VM> {
 
     fn alloc_slow_once(&mut self, size: usize, align: usize, _offset: isize) -> Address {
         let header = 0; // HashSet is used instead of DoublyLinkedList
-        let maxbytes =
-            allocator::get_maximum_aligned_size::<VM>(size + header, align, VM::MIN_ALIGNMENT);
+        let maxbytes = allocator::get_maximum_aligned_size::<VM>(size + header, align);
         let pages = crate::util::conversions::bytes_to_pages_up(maxbytes);
         let sp = self.space.allocate_pages(self.tls, pages);
         if sp.is_zero() {
diff --git a/src/util/alloc/malloc_allocator.rs b/src/util/alloc/malloc_allocator.rs
index d5604a475b..b7074cd20c 100644
--- a/src/util/alloc/malloc_allocator.rs
+++ b/src/util/alloc/malloc_allocator.rs
@@ -1,4 +1,4 @@
-use crate::policy::mallocspace::MallocSpace;
+use crate::policy::marksweepspace::malloc_ms::MallocSpace;
 use crate::policy::space::Space;
 use crate::util::alloc::Allocator;
 use crate::util::opaque_pointer::*;
diff --git a/src/util/alloc/mod.rs b/src/util/alloc/mod.rs
index 149fd2cd99..57f5dd7a41 100644
--- a/src/util/alloc/mod.rs
+++ b/src/util/alloc/mod.rs
@@ -14,7 +14,6 @@ pub use allocators::AllocatorSelector;
 mod bumpallocator;
 pub use bumpallocator::BumpAllocator;
 
-/// Large object allocator
 mod large_object_allocator;
 pub use large_object_allocator::LargeObjectAllocator;
 
@@ -26,6 +25,10 @@ pub use malloc_allocator::MallocAllocator;
 pub mod immix_allocator;
 pub use self::immix_allocator::ImmixAllocator;
 
+// Free list allocator based on Mimalloc
+pub mod free_list_allocator;
+pub use free_list_allocator::FreeListAllocator;
+
 /// Mark compact allocator (actually a bump pointer allocator with an extra heade word)
 mod markcompact_allocator;
 pub use markcompact_allocator::MarkCompactAllocator;
diff --git a/src/util/alloc_bit.rs b/src/util/alloc_bit.rs
index 7702620b10..8b488123cb 100644
--- a/src/util/alloc_bit.rs
+++ b/src/util/alloc_bit.rs
@@ -27,6 +27,11 @@ pub fn unset_alloc_bit<VM: VMBinding>(object: ObjectReference) {
     ALLOC_SIDE_METADATA_SPEC.store_atomic::<u8>(object.to_address::<VM>(), 0, Ordering::SeqCst);
 }
 
+/// Atomically unset the alloc bit for an object, regardless whether the bit is set or not.
+pub fn unset_alloc_bit_nocheck<VM: VMBinding>(object: ObjectReference) {
+    ALLOC_SIDE_METADATA_SPEC.store_atomic::<u8>(object.to_address::<VM>(), 0, Ordering::SeqCst);
+}
+
 /// Non-atomically unset the alloc bit for an object. The caller needs to ensure the side
 /// metadata for the alloc bit for the object is accessed by only one thread.
 ///
diff --git a/src/util/heap/chunk_map.rs b/src/util/heap/chunk_map.rs
new file mode 100644
index 0000000000..797c7339fd
--- /dev/null
+++ b/src/util/heap/chunk_map.rs
@@ -0,0 +1,138 @@
+use crate::scheduler::GCWork;
+use crate::util::linear_scan::Region;
+use crate::util::linear_scan::RegionIterator;
+use crate::util::metadata::side_metadata::SideMetadataSpec;
+use crate::util::Address;
+use crate::vm::VMBinding;
+use spin::Mutex;
+use std::ops::Range;
+
+/// Data structure to reference a MMTk 4 MB chunk.
+#[repr(transparent)]
+#[derive(Debug, Clone, Copy, PartialOrd, PartialEq, Eq)]
+pub struct Chunk(Address);
+
+impl Region for Chunk {
+    const LOG_BYTES: usize = crate::util::heap::layout::vm_layout_constants::LOG_BYTES_IN_CHUNK;
+
+    #[inline(always)]
+    fn from_aligned_address(address: Address) -> Self {
+        debug_assert!(address.is_aligned_to(Self::BYTES));
+        Self(address)
+    }
+
+    #[inline(always)]
+    fn start(&self) -> Address {
+        self.0
+    }
+}
+
+impl Chunk {
+    /// Chunk constant with zero address
+    // FIXME: We use this as an empty value. What if we actually use the first chunk?
+    pub const ZERO: Self = Self(Address::ZERO);
+
+    /// Get an iterator for regions within this chunk.
+    #[inline(always)]
+    pub fn iter_region<R: Region>(&self) -> RegionIterator<R> {
+        // R should be smaller than a chunk
+        debug_assert!(R::LOG_BYTES < Self::LOG_BYTES);
+        // R should be aligned to chunk boundary
+        debug_assert!(R::is_aligned(self.start()));
+        debug_assert!(R::is_aligned(self.end()));
+
+        let start = R::from_aligned_address(self.start());
+        let end = R::from_aligned_address(self.end());
+        RegionIterator::<R>::new(start, end)
+    }
+}
+
+/// Chunk allocation state
+#[repr(u8)]
+#[derive(Debug, PartialEq, Clone, Copy)]
+pub enum ChunkState {
+    /// The chunk is not allocated.
+    Free = 0,
+    /// The chunk is allocated.
+    Allocated = 1,
+}
+
+/// A byte-map to record all the allocated chunks.
+/// A plan can use this to maintain records for the chunks that they used, and the states of the chunks.
+/// Any plan that uses the chunk map should include the `ALLOC_TABLE` spec in their local sidemetadata specs
+pub struct ChunkMap {
+    chunk_range: Mutex<Range<Chunk>>,
+}
+
+impl ChunkMap {
+    /// Chunk alloc table
+    pub const ALLOC_TABLE: SideMetadataSpec =
+        crate::util::metadata::side_metadata::spec_defs::CHUNK_MARK;
+
+    pub fn new() -> Self {
+        Self {
+            chunk_range: Mutex::new(Chunk::ZERO..Chunk::ZERO),
+        }
+    }
+
+    /// Set chunk state
+    pub fn set(&self, chunk: Chunk, state: ChunkState) {
+        // Do nothing if the chunk is already in the expected state.
+        if self.get(chunk) == state {
+            return;
+        }
+        // Update alloc byte
+        unsafe { Self::ALLOC_TABLE.store::<u8>(chunk.start(), state as u8) };
+        // If this is a newly allcoated chunk, then expand the chunk range.
+        if state == ChunkState::Allocated {
+            debug_assert!(!chunk.start().is_zero());
+            let mut range = self.chunk_range.lock();
+            if range.start == Chunk::ZERO {
+                // FIXME: what if we actually use the first chunk?
+                range.start = chunk;
+                range.end = chunk.next();
+            } else if chunk < range.start {
+                range.start = chunk;
+            } else if range.end <= chunk {
+                range.end = chunk.next();
+            }
+        }
+    }
+
+    /// Get chunk state
+    pub fn get(&self, chunk: Chunk) -> ChunkState {
+        let byte = unsafe { Self::ALLOC_TABLE.load::<u8>(chunk.start()) };
+        match byte {
+            0 => ChunkState::Free,
+            1 => ChunkState::Allocated,
+            _ => unreachable!(),
+        }
+    }
+
+    /// A range of all chunks in the heap.
+    pub fn all_chunks(&self) -> RegionIterator<Chunk> {
+        let chunk_range = self.chunk_range.lock();
+        RegionIterator::<Chunk>::new(chunk_range.start, chunk_range.end)
+    }
+
+    /// Helper function to create per-chunk processing work packets for each allocated chunks.
+    pub fn generate_tasks<VM: VMBinding>(
+        &self,
+        func: impl Fn(Chunk) -> Box<dyn GCWork<VM>>,
+    ) -> Vec<Box<dyn GCWork<VM>>> {
+        let mut work_packets: Vec<Box<dyn GCWork<VM>>> = vec![];
+        for chunk in self
+            .all_chunks()
+            .filter(|c| self.get(*c) == ChunkState::Allocated)
+        {
+            work_packets.push(func(chunk));
+        }
+        work_packets
+    }
+}
+
+impl Default for ChunkMap {
+    fn default() -> Self {
+        Self::new()
+    }
+}
diff --git a/src/util/heap/mod.rs b/src/util/heap/mod.rs
index 6a3c8c174d..4f39b65ac4 100644
--- a/src/util/heap/mod.rs
+++ b/src/util/heap/mod.rs
@@ -2,6 +2,7 @@ mod accounting;
 #[macro_use]
 pub mod layout;
 pub mod blockpageresource;
+pub mod chunk_map;
 pub mod freelistpageresource;
 mod heap_meta;
 pub mod monotonepageresource;
diff --git a/src/util/linear_scan.rs b/src/util/linear_scan.rs
index 7df40ce7dd..65dfc4b45f 100644
--- a/src/util/linear_scan.rs
+++ b/src/util/linear_scan.rs
@@ -123,6 +123,11 @@ pub trait Region: Copy + PartialEq + PartialOrd {
     fn containing<VM: VMBinding>(object: ObjectReference) -> Self {
         Self::from_unaligned_address(object.to_address::<VM>())
     }
+    /// Check if the given address is in the region.
+    #[inline(always)]
+    fn includes_address(&self, addr: Address) -> bool {
+        Self::align(addr) == self.start()
+    }
 }
 
 pub struct RegionIterator<R: Region> {
diff --git a/src/util/metadata/mark_bit.rs b/src/util/metadata/mark_bit.rs
new file mode 100644
index 0000000000..ee8dd9b2f3
--- /dev/null
+++ b/src/util/metadata/mark_bit.rs
@@ -0,0 +1,16 @@
+use crate::util::ObjectReference;
+use crate::vm::VMBinding;
+use crate::vm::VMLocalMarkBitSpec;
+use std::sync::atomic::Ordering;
+
+impl VMLocalMarkBitSpec {
+    /// Set the mark bit for the object to 1
+    pub fn mark<VM: VMBinding>(&self, object: ObjectReference, ordering: Ordering) {
+        self.store_atomic::<VM, u8>(object, 1, None, ordering);
+    }
+
+    /// Test if the mark bit for the object is set (1)
+    pub fn is_marked<VM: VMBinding>(&self, object: ObjectReference, ordering: Ordering) -> bool {
+        self.load_atomic::<VM, u8>(object, None, ordering) == 1
+    }
+}
diff --git a/src/util/metadata/metadata_val_traits.rs b/src/util/metadata/metadata_val_traits.rs
index a89404fd2e..943df61b9b 100644
--- a/src/util/metadata/metadata_val_traits.rs
+++ b/src/util/metadata/metadata_val_traits.rs
@@ -1,7 +1,7 @@
 use crate::util::Address;
 use core::sync::atomic::*;
 use num_traits::{FromPrimitive, ToPrimitive};
-use num_traits::{Unsigned, WrappingAdd, WrappingSub};
+use num_traits::{Unsigned, WrappingAdd, WrappingSub, Zero};
 
 /// Describes bits and log2 bits for the numbers.
 /// If num_traits has this, we do not need our own implementation: https://github.com/rust-num/num-traits/issues/247
@@ -60,6 +60,7 @@ impl_bitwise_ops_trait!(usize);
 /// The methods in this trait are mostly about atomically accessing such types.
 pub trait MetadataValue:
     Unsigned
+    + Zero
     + WrappingAdd
     + WrappingSub
     + Bits
diff --git a/src/util/metadata/mod.rs b/src/util/metadata/mod.rs
index 23d1150885..32eac26ac6 100644
--- a/src/util/metadata/mod.rs
+++ b/src/util/metadata/mod.rs
@@ -225,5 +225,6 @@ pub mod side_metadata;
 pub use metadata_val_traits::*;
 
 pub(crate) mod log_bit;
+pub(crate) mod mark_bit;
 
 pub use global::*;
diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs
index 410a930542..a895ae4259 100644
--- a/src/util/metadata/side_metadata/global.rs
+++ b/src/util/metadata/side_metadata/global.rs
@@ -106,9 +106,10 @@ impl SideMetadataSpec {
     pub(crate) fn assert_metadata_mapped(&self, data_addr: Address) {
         let meta_start = address_to_meta_address(self, data_addr).align_down(BYTES_IN_PAGE);
 
-        debug!(
+        trace!(
             "ensure_metadata_is_mapped({}).meta_start({})",
-            data_addr, meta_start
+            data_addr,
+            meta_start
         );
 
         memory::panic_if_unmapped(meta_start, BYTES_IN_PAGE);
@@ -449,6 +450,41 @@ impl SideMetadataSpec {
         )
     }
 
+    /// Non-atomically store zero to the side metadata for the given address.
+    /// This method mainly facilitates clearing multiple metadata specs for the same address in a loop.
+    ///
+    /// # Safety
+    ///
+    /// This is unsafe because:
+    ///
+    /// 1. Concurrent access to this operation is undefined behaviour.
+    /// 2. Interleaving Non-atomic and atomic operations is undefined behaviour.
+    #[inline(always)]
+    pub unsafe fn set_zero(&self, data_addr: Address) {
+        use num_traits::Zero;
+        match self.log_num_of_bits {
+            0..=3 => self.store(data_addr, u8::zero()),
+            4 => self.store(data_addr, u16::zero()),
+            5 => self.store(data_addr, u32::zero()),
+            6 => self.store(data_addr, u64::zero()),
+            _ => unreachable!(),
+        }
+    }
+
+    /// Atomiccally store zero to the side metadata for the given address.
+    /// This method mainly facilitates clearing multiple metadata specs for the same address in a loop.
+    #[inline(always)]
+    pub fn set_zero_atomic(&self, data_addr: Address, order: Ordering) {
+        use num_traits::Zero;
+        match self.log_num_of_bits {
+            0..=3 => self.store_atomic(data_addr, u8::zero(), order),
+            4 => self.store_atomic(data_addr, u16::zero(), order),
+            5 => self.store_atomic(data_addr, u32::zero(), order),
+            6 => self.store_atomic(data_addr, u64::zero(), order),
+            _ => unreachable!(),
+        }
+    }
+
     #[inline(always)]
     pub fn compare_exchange_atomic<T: MetadataValue>(
         &self,
diff --git a/src/util/metadata/side_metadata/spec_defs.rs b/src/util/metadata/side_metadata/spec_defs.rs
index 1d28c3bdde..a3a9dc1818 100644
--- a/src/util/metadata/side_metadata/spec_defs.rs
+++ b/src/util/metadata/side_metadata/spec_defs.rs
@@ -75,8 +75,27 @@ define_side_metadata_specs!(
     IX_BLOCK_DEFRAG = (global: false, log_num_of_bits: 3, log_bytes_in_region: crate::policy::immix::block::Block::LOG_BYTES),
     // Mark blocks by immix
     IX_BLOCK_MARK   = (global: false, log_num_of_bits: 3, log_bytes_in_region: crate::policy::immix::block::Block::LOG_BYTES),
-    // Mark chunks by immix
-    IX_CHUNK_MARK   = (global: false, log_num_of_bits: 3, log_bytes_in_region: crate::policy::immix::chunk::Chunk::LOG_BYTES),
+    // Mark chunks (any plan that uses the chunk map should include this spec in their local sidemetadata specs)
+    CHUNK_MARK   = (global: false, log_num_of_bits: 3, log_bytes_in_region: crate::util::heap::chunk_map::Chunk::LOG_BYTES),
+    // Mark blocks by (native mimalloc) marksweep
+    MS_BLOCK_MARK   = (global: false, log_num_of_bits: 3, log_bytes_in_region: crate::policy::marksweepspace::native_ms::Block::LOG_BYTES),
+    // Next block in list for native mimalloc
+    MS_BLOCK_NEXT   = (global: false, log_num_of_bits: LOG_BITS_IN_ADDRESS, log_bytes_in_region: crate::policy::marksweepspace::native_ms::Block::LOG_BYTES),
+    // Previous block in list for native mimalloc
+    MS_BLOCK_PREV   = (global: false, log_num_of_bits: LOG_BITS_IN_ADDRESS, log_bytes_in_region: crate::policy::marksweepspace::native_ms::Block::LOG_BYTES),
+    // Pointer to owning list for blocks for native mimalloc
+    MS_BLOCK_LIST   = (global: false, log_num_of_bits: LOG_BITS_IN_ADDRESS, log_bytes_in_region: crate::policy::marksweepspace::native_ms::Block::LOG_BYTES),
+    // Size of cells in block for native mimalloc FIXME: do we actually need usize?
+    MS_BLOCK_SIZE         = (global: false, log_num_of_bits: LOG_BITS_IN_ADDRESS, log_bytes_in_region: crate::policy::marksweepspace::native_ms::Block::LOG_BYTES),
+    // TLS of owning mutator of block for native mimalloc
+    MS_BLOCK_TLS    = (global: false, log_num_of_bits: LOG_BITS_IN_ADDRESS, log_bytes_in_region: crate::policy::marksweepspace::native_ms::Block::LOG_BYTES),
+    // First cell of free list in block for native mimalloc
+    MS_FREE         = (global: false, log_num_of_bits: LOG_BITS_IN_ADDRESS, log_bytes_in_region: crate::policy::marksweepspace::native_ms::Block::LOG_BYTES),
+    // The following specs are only used for manual malloc/free
+    // First cell of local free list in block for native mimalloc
+    MS_LOCAL_FREE   = (global: false, log_num_of_bits: LOG_BITS_IN_ADDRESS, log_bytes_in_region: crate::policy::marksweepspace::native_ms::Block::LOG_BYTES),
+    // First cell of thread free list in block for native mimalloc
+    MS_THREAD_FREE  = (global: false, log_num_of_bits: LOG_BITS_IN_ADDRESS, log_bytes_in_region: crate::policy::marksweepspace::native_ms::Block::LOG_BYTES),
 );
 
 #[cfg(test)]
diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs
index 6cf21985ff..61a2a05172 100644
--- a/src/util/statistics/stats.rs
+++ b/src/util/statistics/stats.rs
@@ -11,6 +11,8 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use std::sync::Arc;
 use std::sync::Mutex;
 
+// TODO: Increasing this number would cause JikesRVM die at boot time. I don't really know why.
+// E.g. using 1 << 14 will cause JikesRVM segfault at boot time.
 pub const MAX_PHASES: usize = 1 << 12;
 pub const MAX_COUNTERS: usize = 100;
 
diff --git a/src/vm/mod.rs b/src/vm/mod.rs
index 4bccea5f94..ff47f3d3b7 100644
--- a/src/vm/mod.rs
+++ b/src/vm/mod.rs
@@ -35,6 +35,9 @@ pub use self::scanning::ObjectTracer;
 pub use self::scanning::RootsWorkFactory;
 pub use self::scanning::Scanning;
 
+const DEFAULT_LOG_MIN_ALIGNMENT: usize = LOG_BYTES_IN_INT as usize;
+const DEFAULT_LOG_MAX_ALIGNMENT: usize = LOG_BYTES_IN_LONG as usize;
+
 /// The `VMBinding` trait associates with each trait, and provides VM-specific constants.
 pub trait VMBinding
 where
@@ -53,15 +56,14 @@ where
 
     /// A value to fill in alignment gaps. This value can be used for debugging.
     const ALIGNMENT_VALUE: usize = 0xdead_beef;
-    /// Allowed minimal alignment.
-    const LOG_MIN_ALIGNMENT: usize = LOG_BYTES_IN_INT as usize;
     /// Allowed minimal alignment in bytes.
-    const MIN_ALIGNMENT: usize = 1 << Self::LOG_MIN_ALIGNMENT;
-    /// Allowed maximum alignment as shift by min alignment.
-    const MAX_ALIGNMENT_SHIFT: usize = LOG_BYTES_IN_LONG as usize - LOG_BYTES_IN_INT as usize;
-
+    const MIN_ALIGNMENT: usize = 1 << DEFAULT_LOG_MIN_ALIGNMENT;
     /// Allowed maximum alignment in bytes.
-    const MAX_ALIGNMENT: usize = Self::MIN_ALIGNMENT << Self::MAX_ALIGNMENT_SHIFT;
+    const MAX_ALIGNMENT: usize = 1 << DEFAULT_LOG_MAX_ALIGNMENT;
+    /// Does the binding use a non-zero allocation offset? If this is false, we expect the binding
+    /// to always use offset === 0 for allocation, and we are able to do some optimization if we know
+    /// offset === 0.
+    const USE_ALLOCATION_OFFSET: bool = true;
 
     /// This value is used to assert if the cursor is reasonable after allocations.
     /// At the end of an allocation, the allocation cursor should be aligned to this value.
diff --git a/src/vm/object_model.rs b/src/vm/object_model.rs
index a7f535d24d..bb10f145c2 100644
--- a/src/vm/object_model.rs
+++ b/src/vm/object_model.rs
@@ -379,6 +379,24 @@ pub trait ObjectModel<VM: VMBinding> {
     /// mature space for generational plans.
     const VM_WORST_CASE_COPY_EXPANSION: f64 = 1.5;
 
+    /// If this is true, the binding guarantees that an object reference's raw address is always equal to the return value of the `ref_to_address` method
+    /// and the return value of the `ref_to_object_start` method. This is a very strong guarantee, but it is also helpful for MMTk to
+    /// make some assumptions and optimize for this case.
+    /// If a binding sets this to true, and the related methods return inconsistent results, this is an undefined behavior. MMTk may panic
+    /// if any assertion catches this error, but may also fail silently.
+    const UNIFIED_OBJECT_REFERENCE_ADDRESS: bool = false;
+
+    /// For our allocation result (object_start), the binding may have an offset between the allocation result
+    /// and the raw address of their object reference, i.e. object ref's raw address = object_start + offset.
+    /// The offset could be zero. The offset is not necessary to be
+    /// constant for all the objects. This constant defines the smallest possible offset.
+    ///
+    /// This is used as an indication for MMTk to predict where object references may point to in some algorithms.
+    ///
+    /// We should have the invariant:
+    /// * object ref >= object_start + OBJECT_REF_OFFSET_LOWER_BOUND
+    const OBJECT_REF_OFFSET_LOWER_BOUND: isize;
+
     /// Return the lowest address of the storage associated with an object. This should be
     /// the address that a binding gets by an allocation call ([`crate::memory_manager::alloc`]).
     ///
@@ -400,12 +418,10 @@ pub trait ObjectModel<VM: VMBinding> {
     /// for an given object. For a given object, the returned address
     /// should be a constant offset from the object reference address.
     ///
-    /// If a binding enables the `is_mmtk_object` feature, MMTk may forge the queried address
+    /// Note that MMTk may forge an arbitrary address
     /// directly into a potential object reference, and call this method on the 'object reference'.
     /// In that case, the argument `object` may not be a valid object reference,
     /// and the implementation of this method should not use any object metadata.
-    /// However, if a binding, does not use the`is_mmtk_object` feature, they can expect
-    /// the `object` to be valid.
     ///
     /// MMTk uses this method more frequently than [`crate::vm::ObjectModel::ref_to_object_start`].
     ///
diff --git a/vmbindings/dummyvm/Cargo.toml b/vmbindings/dummyvm/Cargo.toml
index 20be91eded..e0e790f505 100644
--- a/vmbindings/dummyvm/Cargo.toml
+++ b/vmbindings/dummyvm/Cargo.toml
@@ -18,6 +18,7 @@ libc = "0.2"
 lazy_static = "1.1"
 atomic_refcell = "0.1.7"
 atomic = "0.4.6"
+log = "0.4"
 
 [features]
 default = []
diff --git a/vmbindings/dummyvm/src/api.rs b/vmbindings/dummyvm/src/api.rs
index 8b0ec106ff..636582db49 100644
--- a/vmbindings/dummyvm/src/api.rs
+++ b/vmbindings/dummyvm/src/api.rs
@@ -36,7 +36,10 @@ pub extern "C" fn mmtk_bind_mutator(tls: VMMutatorThread) -> *mut Mutator<DummyV
 
 #[no_mangle]
 pub extern "C" fn mmtk_destroy_mutator(mutator: *mut Mutator<DummyVM>) {
-    memory_manager::destroy_mutator(unsafe { Box::from_raw(mutator) })
+    // notify mmtk-core about destroyed mutator
+    memory_manager::destroy_mutator(unsafe { &mut *mutator });
+    // turn the ptr back to a box, and let Rust properly reclaim it
+    let _ = unsafe { Box::from_raw(mutator) };
 }
 
 #[no_mangle]
diff --git a/vmbindings/dummyvm/src/lib.rs b/vmbindings/dummyvm/src/lib.rs
index df163c1ac7..ab13607e24 100644
--- a/vmbindings/dummyvm/src/lib.rs
+++ b/vmbindings/dummyvm/src/lib.rs
@@ -30,11 +30,8 @@ impl VMBinding for DummyVM {
     type VMEdge = edges::DummyVMEdge;
     type VMMemorySlice = edges::DummyVMMemorySlice;
 
-    /// Allowed maximum alignment as shift by min alignment.
-    const MAX_ALIGNMENT_SHIFT: usize = 6_usize - Self::LOG_MIN_ALIGNMENT as usize;
-
     /// Allowed maximum alignment in bytes.
-    const MAX_ALIGNMENT: usize = Self::MIN_ALIGNMENT << Self::MAX_ALIGNMENT_SHIFT;
+    const MAX_ALIGNMENT: usize = 1 << 6;
 }
 
 use std::sync::atomic::{AtomicBool, Ordering};
diff --git a/vmbindings/dummyvm/src/object_model.rs b/vmbindings/dummyvm/src/object_model.rs
index aff3545601..57e9484e6f 100644
--- a/vmbindings/dummyvm/src/object_model.rs
+++ b/vmbindings/dummyvm/src/object_model.rs
@@ -16,6 +16,8 @@ impl ObjectModel<DummyVM> for VMObjectModel {
     const LOCAL_MARK_BIT_SPEC: VMLocalMarkBitSpec = VMLocalMarkBitSpec::in_header(0);
     const LOCAL_LOS_MARK_NURSERY_SPEC: VMLocalLOSMarkNurserySpec = VMLocalLOSMarkNurserySpec::in_header(0);
 
+    const OBJECT_REF_OFFSET_LOWER_BOUND: isize = OBJECT_REF_OFFSET as isize;
+
     fn copy(
         _from: ObjectReference,
         _semantics: CopySemantics,
diff --git a/vmbindings/dummyvm/src/tests/allocate_align_offset.rs b/vmbindings/dummyvm/src/tests/allocate_align_offset.rs
new file mode 100644
index 0000000000..afa0081bfc
--- /dev/null
+++ b/vmbindings/dummyvm/src/tests/allocate_align_offset.rs
@@ -0,0 +1,45 @@
+// GITHUB-CI: MMTK_PLAN=all
+
+use crate::api;
+use crate::DummyVM;
+use crate::tests::fixtures::{Fixture, MutatorFixture};
+use mmtk::plan::AllocationSemantics;
+use mmtk::vm::VMBinding;
+use log::info;
+
+lazy_static! {
+    static ref MUTATOR: Fixture<MutatorFixture> = Fixture::new();
+}
+
+#[test]
+pub fn allocate_alignment() {
+    MUTATOR.with_fixture(|fixture| {
+        let min = DummyVM::MIN_ALIGNMENT;
+        let max = DummyVM::MAX_ALIGNMENT;
+        info!("Allowed alignment between {} and {}", min, max);
+        let mut align = min;
+        while align <= max {
+            info!("Test allocation with alignment {}", align);
+            let addr = api::mmtk_alloc(fixture.mutator, 8, align, 0, AllocationSemantics::Default);
+            assert!(addr.is_aligned_to(align), "Expected allocation alignment {}, returned address is {:?}", align, addr);
+            align *= 2;
+        }
+    })
+}
+
+#[test]
+pub fn allocate_offset() {
+    MUTATOR.with_fixture(|fixture| {
+        const OFFSET: isize = 4;
+        let min = DummyVM::MIN_ALIGNMENT;
+        let max = DummyVM::MAX_ALIGNMENT;
+        info!("Allowed alignment between {} and {}", min, max);
+        let mut align = min;
+        while align <= max {
+            info!("Test allocation with alignment {} and offset {}", align, OFFSET);
+            let addr = api::mmtk_alloc(fixture.mutator, 8, align, OFFSET, AllocationSemantics::Default);
+            assert!((addr + OFFSET).is_aligned_to(align), "Expected allocation alignment {}, returned address is {:?}", align, addr);
+            align *= 2;
+        }
+    })
+}
diff --git a/vmbindings/dummyvm/src/tests/fixtures/mod.rs b/vmbindings/dummyvm/src/tests/fixtures/mod.rs
index 9f95eef12c..6debb06a8a 100644
--- a/vmbindings/dummyvm/src/tests/fixtures/mod.rs
+++ b/vmbindings/dummyvm/src/tests/fixtures/mod.rs
@@ -140,3 +140,27 @@ impl FixtureContent for TwoObjects {
         TwoObjects { objref1, objref2 }
     }
 }
+
+use mmtk::plan::Mutator;
+
+pub struct MutatorFixture {
+    pub mmtk: &'static MMTK<DummyVM>,
+    pub mutator: *mut Mutator<DummyVM>,
+}
+
+impl FixtureContent for MutatorFixture {
+    fn create() -> Self {
+        const MB: usize = 1024 * 1024;
+        // 1MB heap
+        mmtk_init(MB);
+        mmtk_initialize_collection(VMThread::UNINITIALIZED);
+        // Make sure GC does not run during test.
+        mmtk_disable_collection();
+        let handle = mmtk_bind_mutator(VMMutatorThread(VMThread::UNINITIALIZED));
+
+        MutatorFixture {
+            mmtk: &crate::SINGLETON,
+            mutator: handle,
+        }
+    }
+}
diff --git a/vmbindings/dummyvm/src/tests/mod.rs b/vmbindings/dummyvm/src/tests/mod.rs
index 353156d856..fd94fe8d4a 100644
--- a/vmbindings/dummyvm/src/tests/mod.rs
+++ b/vmbindings/dummyvm/src/tests/mod.rs
@@ -7,6 +7,7 @@
 mod issue139;
 mod handle_mmap_oom;
 mod handle_mmap_conflict;
+mod allocate_align_offset;
 mod allocate_without_initialize_collection;
 mod allocate_with_initialize_collection;
 mod allocate_with_disable_collection;