Skip to content

Commit

Permalink
rust: block: introduce kernel::block::mq module
Browse files Browse the repository at this point in the history
Add initial abstractions for working with blk-mq.

This patch is a maintained, refactored subset of code originally published
by Wedson Almeida Filho <wedsonaf@gmail.com> [1].

[1] https://github.com/wedsonaf/linux/tree/f2cfd2fe0e2ca4e90994f96afe268bbd4382a891/rust/kernel/blk/mq.rs

Cc: Wedson Almeida Filho <wedsonaf@gmail.com>
Signed-off-by: Andreas Hindborg <a.hindborg@samsung.com>
Reviewed-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20240611114551.228679-2-nmi@metaspace.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
metaspace authored and axboe committed Jun 14, 2024
1 parent c2670cf commit 3253aba
Show file tree
Hide file tree
Showing 11 changed files with 984 additions and 0 deletions.
3 changes: 3 additions & 0 deletions rust/bindings/bindings_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
*/

#include <kunit/test.h>
#include <linux/blk_types.h>
#include <linux/blk-mq.h>
#include <linux/errname.h>
#include <linux/ethtool.h>
#include <linux/jiffies.h>
Expand All @@ -20,6 +22,7 @@

/* `bindgen` gets confused at certain things. */
const size_t RUST_CONST_HELPER_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN;
const size_t RUST_CONST_HELPER_PAGE_SIZE = PAGE_SIZE;
const gfp_t RUST_CONST_HELPER_GFP_ATOMIC = GFP_ATOMIC;
const gfp_t RUST_CONST_HELPER_GFP_KERNEL = GFP_KERNEL;
const gfp_t RUST_CONST_HELPER_GFP_KERNEL_ACCOUNT = GFP_KERNEL_ACCOUNT;
Expand Down
16 changes: 16 additions & 0 deletions rust/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,19 @@ static_assert(
__alignof__(size_t) == __alignof__(uintptr_t),
"Rust code expects C `size_t` to match Rust `usize`"
);

// This will soon be moved to a separate file, so no need to merge with above.
#include <linux/blk-mq.h>
#include <linux/blkdev.h>

void *rust_helper_blk_mq_rq_to_pdu(struct request *rq)
{
return blk_mq_rq_to_pdu(rq);
}
EXPORT_SYMBOL_GPL(rust_helper_blk_mq_rq_to_pdu);

struct request *rust_helper_blk_mq_rq_from_pdu(void *pdu)
{
return blk_mq_rq_from_pdu(pdu);
}
EXPORT_SYMBOL_GPL(rust_helper_blk_mq_rq_from_pdu);
5 changes: 5 additions & 0 deletions rust/kernel/block.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// SPDX-License-Identifier: GPL-2.0

//! Types for working with the block layer.
pub mod mq;
98 changes: 98 additions & 0 deletions rust/kernel/block/mq.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// SPDX-License-Identifier: GPL-2.0

//! This module provides types for implementing block drivers that interface the
//! blk-mq subsystem.
//!
//! To implement a block device driver, a Rust module must do the following:
//!
//! - Implement [`Operations`] for a type `T`.
//! - Create a [`TagSet<T>`].
//! - Create a [`GenDisk<T>`], via the [`GenDiskBuilder`].
//! - Add the disk to the system by calling [`GenDiskBuilder::build`] passing in
//! the `TagSet` reference.
//!
//! The types available in this module that have direct C counterparts are:
//!
//! - The [`TagSet`] type that abstracts the C type `struct tag_set`.
//! - The [`GenDisk`] type that abstracts the C type `struct gendisk`.
//! - The [`Request`] type that abstracts the C type `struct request`.
//!
//! The kernel will interface with the block device driver by calling the method
//! implementations of the `Operations` trait.
//!
//! IO requests are passed to the driver as [`kernel::types::ARef<Request>`]
//! instances. The `Request` type is a wrapper around the C `struct request`.
//! The driver must mark end of processing by calling one of the
//! `Request::end`, methods. Failure to do so can lead to deadlock or timeout
//! errors. Please note that the C function `blk_mq_start_request` is implicitly
//! called when the request is queued with the driver.
//!
//! The `TagSet` is responsible for creating and maintaining a mapping between
//! `Request`s and integer ids as well as carrying a pointer to the vtable
//! generated by `Operations`. This mapping is useful for associating
//! completions from hardware with the correct `Request` instance. The `TagSet`
//! determines the maximum queue depth by setting the number of `Request`
//! instances available to the driver, and it determines the number of queues to
//! instantiate for the driver. If possible, a driver should allocate one queue
//! per core, to keep queue data local to a core.
//!
//! One `TagSet` instance can be shared between multiple `GenDisk` instances.
//! This can be useful when implementing drivers where one piece of hardware
//! with one set of IO resources are represented to the user as multiple disks.
//!
//! One significant difference between block device drivers implemented with
//! these Rust abstractions and drivers implemented in C, is that the Rust
//! drivers have to own a reference count on the `Request` type when the IO is
//! in flight. This is to ensure that the C `struct request` instances backing
//! the Rust `Request` instances are live while the Rust driver holds a
//! reference to the `Request`. In addition, the conversion of an integer tag to
//! a `Request` via the `TagSet` would not be sound without this bookkeeping.
//!
//! [`GenDisk`]: gen_disk::GenDisk
//! [`GenDisk<T>`]: gen_disk::GenDisk
//! [`GenDiskBuilder`]: gen_disk::GenDiskBuilder
//! [`GenDiskBuilder::build`]: gen_disk::GenDiskBuilder::build
//!
//! # Example
//!
//! ```rust
//! use kernel::{
//! alloc::flags,
//! block::mq::*,
//! new_mutex,
//! prelude::*,
//! sync::{Arc, Mutex},
//! types::{ARef, ForeignOwnable},
//! };
//!
//! struct MyBlkDevice;
//!
//! #[vtable]
//! impl Operations for MyBlkDevice {
//!
//! fn queue_rq(rq: ARef<Request<Self>>, _is_last: bool) -> Result {
//! Request::end_ok(rq);
//! Ok(())
//! }
//!
//! fn commit_rqs() {}
//! }
//!
//! let tagset: Arc<TagSet<MyBlkDevice>> =
//! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?;
//! let mut disk = gen_disk::GenDiskBuilder::new()
//! .capacity_sectors(4096)
//! .build(format_args!("myblk"), tagset)?;
//!
//! # Ok::<(), kernel::error::Error>(())
//! ```
pub mod gen_disk;
mod operations;
mod raw_writer;
mod request;
mod tag_set;

pub use operations::Operations;
pub use request::Request;
pub use tag_set::TagSet;
215 changes: 215 additions & 0 deletions rust/kernel/block/mq/gen_disk.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
// SPDX-License-Identifier: GPL-2.0

//! Generic disk abstraction.
//!
//! C header: [`include/linux/blkdev.h`](srctree/include/linux/blkdev.h)
//! C header: [`include/linux/blk_mq.h`](srctree/include/linux/blk_mq.h)
use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet};
use crate::error;
use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc};
use core::fmt::{self, Write};

/// A builder for [`GenDisk`].
///
/// Use this struct to configure and add new [`GenDisk`] to the VFS.
pub struct GenDiskBuilder {
rotational: bool,
logical_block_size: u32,
physical_block_size: u32,
capacity_sectors: u64,
}

impl Default for GenDiskBuilder {
fn default() -> Self {
Self {
rotational: false,
logical_block_size: bindings::PAGE_SIZE as u32,
physical_block_size: bindings::PAGE_SIZE as u32,
capacity_sectors: 0,
}
}
}

impl GenDiskBuilder {
/// Create a new instance.
pub fn new() -> Self {
Self::default()
}

/// Set the rotational media attribute for the device to be built.
pub fn rotational(mut self, rotational: bool) -> Self {
self.rotational = rotational;
self
}

/// Validate block size by verifying that it is between 512 and `PAGE_SIZE`,
/// and that it is a power of two.
fn validate_block_size(size: u32) -> Result<()> {
if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() {
Err(error::code::EINVAL)
} else {
Ok(())
}
}

/// Set the logical block size of the device to be built.
///
/// This method will check that block size is a power of two and between 512
/// and 4096. If not, an error is returned and the block size is not set.
///
/// This is the smallest unit the storage device can address. It is
/// typically 4096 bytes.
pub fn logical_block_size(mut self, block_size: u32) -> Result<Self> {
Self::validate_block_size(block_size)?;
self.logical_block_size = block_size;
Ok(self)
}

/// Set the physical block size of the device to be built.
///
/// This method will check that block size is a power of two and between 512
/// and 4096. If not, an error is returned and the block size is not set.
///
/// This is the smallest unit a physical storage device can write
/// atomically. It is usually the same as the logical block size but may be
/// bigger. One example is SATA drives with 4096 byte physical block size
/// that expose a 512 byte logical block size to the operating system.
pub fn physical_block_size(mut self, block_size: u32) -> Result<Self> {
Self::validate_block_size(block_size)?;
self.physical_block_size = block_size;
Ok(self)
}

/// Set the capacity of the device to be built, in sectors (512 bytes).
pub fn capacity_sectors(mut self, capacity: u64) -> Self {
self.capacity_sectors = capacity;
self
}

/// Build a new `GenDisk` and add it to the VFS.
pub fn build<T: Operations>(
self,
name: fmt::Arguments<'_>,
tagset: Arc<TagSet<T>>,
) -> Result<GenDisk<T>> {
let lock_class_key = crate::sync::LockClassKey::new();

// SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set
let gendisk = from_err_ptr(unsafe {
bindings::__blk_mq_alloc_disk(
tagset.raw_tag_set(),
core::ptr::null_mut(), // TODO: We can pass queue limits right here
core::ptr::null_mut(),
lock_class_key.as_ptr(),
)
})?;

const TABLE: bindings::block_device_operations = bindings::block_device_operations {
submit_bio: None,
open: None,
release: None,
ioctl: None,
compat_ioctl: None,
check_events: None,
unlock_native_capacity: None,
getgeo: None,
set_read_only: None,
swap_slot_free_notify: None,
report_zones: None,
devnode: None,
alternative_gpt_sector: None,
get_unique_id: None,
// TODO: Set to THIS_MODULE. Waiting for const_refs_to_static feature to
// be merged (unstable in rustc 1.78 which is staged for linux 6.10)
// https://github.com/rust-lang/rust/issues/119618
owner: core::ptr::null_mut(),
pr_ops: core::ptr::null_mut(),
free_disk: None,
poll_bio: None,
};

// SAFETY: `gendisk` is a valid pointer as we initialized it above
unsafe { (*gendisk).fops = &TABLE };

let mut raw_writer = RawWriter::from_array(
// SAFETY: `gendisk` points to a valid and initialized instance. We
// have exclusive access, since the disk is not added to the VFS
// yet.
unsafe { &mut (*gendisk).disk_name },
)?;
raw_writer.write_fmt(name)?;
raw_writer.write_char('\0')?;

// SAFETY: `gendisk` points to a valid and initialized instance of
// `struct gendisk`. We have exclusive access, so we cannot race.
unsafe {
bindings::blk_queue_logical_block_size((*gendisk).queue, self.logical_block_size)
};

// SAFETY: `gendisk` points to a valid and initialized instance of
// `struct gendisk`. We have exclusive access, so we cannot race.
unsafe {
bindings::blk_queue_physical_block_size((*gendisk).queue, self.physical_block_size)
};

// SAFETY: `gendisk` points to a valid and initialized instance of
// `struct gendisk`. `set_capacity` takes a lock to synchronize this
// operation, so we will not race.
unsafe { bindings::set_capacity(gendisk, self.capacity_sectors) };

if !self.rotational {
// SAFETY: `gendisk` points to a valid and initialized instance of
// `struct gendisk`. This operation uses a relaxed atomic bit flip
// operation, so there is no race on this field.
unsafe { bindings::blk_queue_flag_set(bindings::QUEUE_FLAG_NONROT, (*gendisk).queue) };
} else {
// SAFETY: `gendisk` points to a valid and initialized instance of
// `struct gendisk`. This operation uses a relaxed atomic bit flip
// operation, so there is no race on this field.
unsafe {
bindings::blk_queue_flag_clear(bindings::QUEUE_FLAG_NONROT, (*gendisk).queue)
};
}

crate::error::to_result(
// SAFETY: `gendisk` points to a valid and initialized instance of
// `struct gendisk`.
unsafe {
bindings::device_add_disk(core::ptr::null_mut(), gendisk, core::ptr::null_mut())
},
)?;

// INVARIANT: `gendisk` was initialized above.
// INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above.
Ok(GenDisk {
_tagset: tagset,
gendisk,
})
}
}

/// A generic block device.
///
/// # Invariants
///
/// - `gendisk` must always point to an initialized and valid `struct gendisk`.
/// - `gendisk` was added to the VFS through a call to
/// `bindings::device_add_disk`.
pub struct GenDisk<T: Operations> {
_tagset: Arc<TagSet<T>>,
gendisk: *mut bindings::gendisk,
}

// SAFETY: `GenDisk` is an owned pointer to a `struct gendisk` and an `Arc` to a
// `TagSet` It is safe to send this to other threads as long as T is Send.
unsafe impl<T: Operations + Send> Send for GenDisk<T> {}

impl<T: Operations> Drop for GenDisk<T> {
fn drop(&mut self) {
// SAFETY: By type invariant, `self.gendisk` points to a valid and
// initialized instance of `struct gendisk`, and it was previously added
// to the VFS.
unsafe { bindings::del_gendisk(self.gendisk) };
}
}
Loading

0 comments on commit 3253aba

Please sign in to comment.