forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 435
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
rust: block: introduce
kernel::block::mq
module
Add initial abstractions for working with blk-mq. This patch is a maintained, refactored subset of code originally published by Wedson Almeida Filho <wedsonaf@gmail.com> [1]. [1] https://github.com/wedsonaf/linux/tree/f2cfd2fe0e2ca4e90994f96afe268bbd4382a891/rust/kernel/blk/mq.rs Cc: Wedson Almeida Filho <wedsonaf@gmail.com> Signed-off-by: Andreas Hindborg <a.hindborg@samsung.com> Reviewed-by: Benno Lossin <benno.lossin@proton.me> Link: https://lore.kernel.org/r/20240611114551.228679-2-nmi@metaspace.dk Signed-off-by: Jens Axboe <axboe@kernel.dk>
- Loading branch information
Showing
11 changed files
with
984 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
// SPDX-License-Identifier: GPL-2.0 | ||
|
||
//! Types for working with the block layer. | ||
pub mod mq; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
// SPDX-License-Identifier: GPL-2.0 | ||
|
||
//! This module provides types for implementing block drivers that interface the | ||
//! blk-mq subsystem. | ||
//! | ||
//! To implement a block device driver, a Rust module must do the following: | ||
//! | ||
//! - Implement [`Operations`] for a type `T`. | ||
//! - Create a [`TagSet<T>`]. | ||
//! - Create a [`GenDisk<T>`], via the [`GenDiskBuilder`]. | ||
//! - Add the disk to the system by calling [`GenDiskBuilder::build`] passing in | ||
//! the `TagSet` reference. | ||
//! | ||
//! The types available in this module that have direct C counterparts are: | ||
//! | ||
//! - The [`TagSet`] type that abstracts the C type `struct tag_set`. | ||
//! - The [`GenDisk`] type that abstracts the C type `struct gendisk`. | ||
//! - The [`Request`] type that abstracts the C type `struct request`. | ||
//! | ||
//! The kernel will interface with the block device driver by calling the method | ||
//! implementations of the `Operations` trait. | ||
//! | ||
//! IO requests are passed to the driver as [`kernel::types::ARef<Request>`] | ||
//! instances. The `Request` type is a wrapper around the C `struct request`. | ||
//! The driver must mark end of processing by calling one of the | ||
//! `Request::end`, methods. Failure to do so can lead to deadlock or timeout | ||
//! errors. Please note that the C function `blk_mq_start_request` is implicitly | ||
//! called when the request is queued with the driver. | ||
//! | ||
//! The `TagSet` is responsible for creating and maintaining a mapping between | ||
//! `Request`s and integer ids as well as carrying a pointer to the vtable | ||
//! generated by `Operations`. This mapping is useful for associating | ||
//! completions from hardware with the correct `Request` instance. The `TagSet` | ||
//! determines the maximum queue depth by setting the number of `Request` | ||
//! instances available to the driver, and it determines the number of queues to | ||
//! instantiate for the driver. If possible, a driver should allocate one queue | ||
//! per core, to keep queue data local to a core. | ||
//! | ||
//! One `TagSet` instance can be shared between multiple `GenDisk` instances. | ||
//! This can be useful when implementing drivers where one piece of hardware | ||
//! with one set of IO resources are represented to the user as multiple disks. | ||
//! | ||
//! One significant difference between block device drivers implemented with | ||
//! these Rust abstractions and drivers implemented in C, is that the Rust | ||
//! drivers have to own a reference count on the `Request` type when the IO is | ||
//! in flight. This is to ensure that the C `struct request` instances backing | ||
//! the Rust `Request` instances are live while the Rust driver holds a | ||
//! reference to the `Request`. In addition, the conversion of an integer tag to | ||
//! a `Request` via the `TagSet` would not be sound without this bookkeeping. | ||
//! | ||
//! [`GenDisk`]: gen_disk::GenDisk | ||
//! [`GenDisk<T>`]: gen_disk::GenDisk | ||
//! [`GenDiskBuilder`]: gen_disk::GenDiskBuilder | ||
//! [`GenDiskBuilder::build`]: gen_disk::GenDiskBuilder::build | ||
//! | ||
//! # Example | ||
//! | ||
//! ```rust | ||
//! use kernel::{ | ||
//! alloc::flags, | ||
//! block::mq::*, | ||
//! new_mutex, | ||
//! prelude::*, | ||
//! sync::{Arc, Mutex}, | ||
//! types::{ARef, ForeignOwnable}, | ||
//! }; | ||
//! | ||
//! struct MyBlkDevice; | ||
//! | ||
//! #[vtable] | ||
//! impl Operations for MyBlkDevice { | ||
//! | ||
//! fn queue_rq(rq: ARef<Request<Self>>, _is_last: bool) -> Result { | ||
//! Request::end_ok(rq); | ||
//! Ok(()) | ||
//! } | ||
//! | ||
//! fn commit_rqs() {} | ||
//! } | ||
//! | ||
//! let tagset: Arc<TagSet<MyBlkDevice>> = | ||
//! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?; | ||
//! let mut disk = gen_disk::GenDiskBuilder::new() | ||
//! .capacity_sectors(4096) | ||
//! .build(format_args!("myblk"), tagset)?; | ||
//! | ||
//! # Ok::<(), kernel::error::Error>(()) | ||
//! ``` | ||
pub mod gen_disk; | ||
mod operations; | ||
mod raw_writer; | ||
mod request; | ||
mod tag_set; | ||
|
||
pub use operations::Operations; | ||
pub use request::Request; | ||
pub use tag_set::TagSet; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
// SPDX-License-Identifier: GPL-2.0 | ||
|
||
//! Generic disk abstraction. | ||
//! | ||
//! C header: [`include/linux/blkdev.h`](srctree/include/linux/blkdev.h) | ||
//! C header: [`include/linux/blk_mq.h`](srctree/include/linux/blk_mq.h) | ||
use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet}; | ||
use crate::error; | ||
use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc}; | ||
use core::fmt::{self, Write}; | ||
|
||
/// A builder for [`GenDisk`]. | ||
/// | ||
/// Use this struct to configure and add new [`GenDisk`] to the VFS. | ||
pub struct GenDiskBuilder { | ||
rotational: bool, | ||
logical_block_size: u32, | ||
physical_block_size: u32, | ||
capacity_sectors: u64, | ||
} | ||
|
||
impl Default for GenDiskBuilder { | ||
fn default() -> Self { | ||
Self { | ||
rotational: false, | ||
logical_block_size: bindings::PAGE_SIZE as u32, | ||
physical_block_size: bindings::PAGE_SIZE as u32, | ||
capacity_sectors: 0, | ||
} | ||
} | ||
} | ||
|
||
impl GenDiskBuilder { | ||
/// Create a new instance. | ||
pub fn new() -> Self { | ||
Self::default() | ||
} | ||
|
||
/// Set the rotational media attribute for the device to be built. | ||
pub fn rotational(mut self, rotational: bool) -> Self { | ||
self.rotational = rotational; | ||
self | ||
} | ||
|
||
/// Validate block size by verifying that it is between 512 and `PAGE_SIZE`, | ||
/// and that it is a power of two. | ||
fn validate_block_size(size: u32) -> Result<()> { | ||
if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() { | ||
Err(error::code::EINVAL) | ||
} else { | ||
Ok(()) | ||
} | ||
} | ||
|
||
/// Set the logical block size of the device to be built. | ||
/// | ||
/// This method will check that block size is a power of two and between 512 | ||
/// and 4096. If not, an error is returned and the block size is not set. | ||
/// | ||
/// This is the smallest unit the storage device can address. It is | ||
/// typically 4096 bytes. | ||
pub fn logical_block_size(mut self, block_size: u32) -> Result<Self> { | ||
Self::validate_block_size(block_size)?; | ||
self.logical_block_size = block_size; | ||
Ok(self) | ||
} | ||
|
||
/// Set the physical block size of the device to be built. | ||
/// | ||
/// This method will check that block size is a power of two and between 512 | ||
/// and 4096. If not, an error is returned and the block size is not set. | ||
/// | ||
/// This is the smallest unit a physical storage device can write | ||
/// atomically. It is usually the same as the logical block size but may be | ||
/// bigger. One example is SATA drives with 4096 byte physical block size | ||
/// that expose a 512 byte logical block size to the operating system. | ||
pub fn physical_block_size(mut self, block_size: u32) -> Result<Self> { | ||
Self::validate_block_size(block_size)?; | ||
self.physical_block_size = block_size; | ||
Ok(self) | ||
} | ||
|
||
/// Set the capacity of the device to be built, in sectors (512 bytes). | ||
pub fn capacity_sectors(mut self, capacity: u64) -> Self { | ||
self.capacity_sectors = capacity; | ||
self | ||
} | ||
|
||
/// Build a new `GenDisk` and add it to the VFS. | ||
pub fn build<T: Operations>( | ||
self, | ||
name: fmt::Arguments<'_>, | ||
tagset: Arc<TagSet<T>>, | ||
) -> Result<GenDisk<T>> { | ||
let lock_class_key = crate::sync::LockClassKey::new(); | ||
|
||
// SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set | ||
let gendisk = from_err_ptr(unsafe { | ||
bindings::__blk_mq_alloc_disk( | ||
tagset.raw_tag_set(), | ||
core::ptr::null_mut(), // TODO: We can pass queue limits right here | ||
core::ptr::null_mut(), | ||
lock_class_key.as_ptr(), | ||
) | ||
})?; | ||
|
||
const TABLE: bindings::block_device_operations = bindings::block_device_operations { | ||
submit_bio: None, | ||
open: None, | ||
release: None, | ||
ioctl: None, | ||
compat_ioctl: None, | ||
check_events: None, | ||
unlock_native_capacity: None, | ||
getgeo: None, | ||
set_read_only: None, | ||
swap_slot_free_notify: None, | ||
report_zones: None, | ||
devnode: None, | ||
alternative_gpt_sector: None, | ||
get_unique_id: None, | ||
// TODO: Set to THIS_MODULE. Waiting for const_refs_to_static feature to | ||
// be merged (unstable in rustc 1.78 which is staged for linux 6.10) | ||
// https://github.com/rust-lang/rust/issues/119618 | ||
owner: core::ptr::null_mut(), | ||
pr_ops: core::ptr::null_mut(), | ||
free_disk: None, | ||
poll_bio: None, | ||
}; | ||
|
||
// SAFETY: `gendisk` is a valid pointer as we initialized it above | ||
unsafe { (*gendisk).fops = &TABLE }; | ||
|
||
let mut raw_writer = RawWriter::from_array( | ||
// SAFETY: `gendisk` points to a valid and initialized instance. We | ||
// have exclusive access, since the disk is not added to the VFS | ||
// yet. | ||
unsafe { &mut (*gendisk).disk_name }, | ||
)?; | ||
raw_writer.write_fmt(name)?; | ||
raw_writer.write_char('\0')?; | ||
|
||
// SAFETY: `gendisk` points to a valid and initialized instance of | ||
// `struct gendisk`. We have exclusive access, so we cannot race. | ||
unsafe { | ||
bindings::blk_queue_logical_block_size((*gendisk).queue, self.logical_block_size) | ||
}; | ||
|
||
// SAFETY: `gendisk` points to a valid and initialized instance of | ||
// `struct gendisk`. We have exclusive access, so we cannot race. | ||
unsafe { | ||
bindings::blk_queue_physical_block_size((*gendisk).queue, self.physical_block_size) | ||
}; | ||
|
||
// SAFETY: `gendisk` points to a valid and initialized instance of | ||
// `struct gendisk`. `set_capacity` takes a lock to synchronize this | ||
// operation, so we will not race. | ||
unsafe { bindings::set_capacity(gendisk, self.capacity_sectors) }; | ||
|
||
if !self.rotational { | ||
// SAFETY: `gendisk` points to a valid and initialized instance of | ||
// `struct gendisk`. This operation uses a relaxed atomic bit flip | ||
// operation, so there is no race on this field. | ||
unsafe { bindings::blk_queue_flag_set(bindings::QUEUE_FLAG_NONROT, (*gendisk).queue) }; | ||
} else { | ||
// SAFETY: `gendisk` points to a valid and initialized instance of | ||
// `struct gendisk`. This operation uses a relaxed atomic bit flip | ||
// operation, so there is no race on this field. | ||
unsafe { | ||
bindings::blk_queue_flag_clear(bindings::QUEUE_FLAG_NONROT, (*gendisk).queue) | ||
}; | ||
} | ||
|
||
crate::error::to_result( | ||
// SAFETY: `gendisk` points to a valid and initialized instance of | ||
// `struct gendisk`. | ||
unsafe { | ||
bindings::device_add_disk(core::ptr::null_mut(), gendisk, core::ptr::null_mut()) | ||
}, | ||
)?; | ||
|
||
// INVARIANT: `gendisk` was initialized above. | ||
// INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above. | ||
Ok(GenDisk { | ||
_tagset: tagset, | ||
gendisk, | ||
}) | ||
} | ||
} | ||
|
||
/// A generic block device. | ||
/// | ||
/// # Invariants | ||
/// | ||
/// - `gendisk` must always point to an initialized and valid `struct gendisk`. | ||
/// - `gendisk` was added to the VFS through a call to | ||
/// `bindings::device_add_disk`. | ||
pub struct GenDisk<T: Operations> { | ||
_tagset: Arc<TagSet<T>>, | ||
gendisk: *mut bindings::gendisk, | ||
} | ||
|
||
// SAFETY: `GenDisk` is an owned pointer to a `struct gendisk` and an `Arc` to a | ||
// `TagSet` It is safe to send this to other threads as long as T is Send. | ||
unsafe impl<T: Operations + Send> Send for GenDisk<T> {} | ||
|
||
impl<T: Operations> Drop for GenDisk<T> { | ||
fn drop(&mut self) { | ||
// SAFETY: By type invariant, `self.gendisk` points to a valid and | ||
// initialized instance of `struct gendisk`, and it was previously added | ||
// to the VFS. | ||
unsafe { bindings::del_gendisk(self.gendisk) }; | ||
} | ||
} |
Oops, something went wrong.