Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vmm: support VFIO device #99

Merged
merged 4 commits into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 30 additions & 40 deletions vmm/sandbox/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ limitations under the License.
*/

use std::{
os::unix::io::{IntoRawFd, RawFd},
io::{BufRead, BufReader, Write},
os::unix::{
io::{IntoRawFd, RawFd},
net::UnixStream,
},
time::Duration,
};

Expand All @@ -30,17 +34,12 @@ use nix::{
time::{clock_gettime, ClockId},
unistd::close,
};
use tokio::{
io::{AsyncReadExt, AsyncWriteExt},
net::UnixStream,
time::timeout,
};
use tokio::time::timeout;
use ttrpc::{context::with_timeout, r#async::Client};
use vmm_common::api::{sandbox::*, sandbox_ttrpc::SandboxServiceClient};

use crate::network::{NetworkInterface, Route};

const HVSOCK_RETRY_TIMEOUT_IN_MS: u64 = 10;
const TIME_SYNC_PERIOD: u64 = 60;
const TIME_DIFF_TOLERANCE_IN_MS: u64 = 10;

Expand Down Expand Up @@ -68,7 +67,11 @@ async fn new_ttrpc_client(address: &str) -> Result<Client> {

let client = timeout(Duration::from_secs(ctx_timeout), fut)
.await
.map_err(|_| anyhow!("{}s timeout connecting socket: {}", ctx_timeout, last_err))?;
.map_err(|_| {
let e = anyhow!("{}s timeout connecting socket: {}", ctx_timeout, last_err);
error!("{}", e);
e
})?;
Ok(client)
}

Expand Down Expand Up @@ -162,41 +165,28 @@ async fn connect_to_hvsocket(address: &str) -> Result<RawFd> {
if v.len() < 2 {
return Err(anyhow!("hvsock address {} should not less than 2", address).into());
}
(v[0], v[1])
(v[0].to_string(), v[1].to_string())
};

let fut = async {
let mut stream = UnixStream::connect(addr).await?;

match stream.write(format!("CONNECT {}\n", port).as_bytes()).await {
Ok(_) => {
let mut buf = [0; 4096];
match stream.read(&mut buf).await {
Ok(0) => Err(anyhow!("stream closed")),
Ok(n) => {
if String::from_utf8(buf[..n].to_vec())
.unwrap_or_default()
.contains("OK")
{
return Ok(stream.into_std()?.into_raw_fd());
}
Err(anyhow!("failed to connect"))
}
Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => {
Err(anyhow!("{}", e))
}
Err(e) => Err(anyhow!("failed to read from hvsock: {}", e)),
}
}
Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => Err(anyhow!("{}", e)),
Err(e) => Err(anyhow!("failed to write CONNECT to hvsock: {}", e)),
tokio::task::spawn_blocking(move || {
let mut stream =
UnixStream::connect(&addr).map_err(|e| anyhow!("failed to connect hvsock: {}", e))?;
stream
.write_all(format!("CONNECT {}\n", port).as_bytes())
.map_err(|e| anyhow!("hvsock connected but failed to write CONNECT: {}", e))?;

let mut response = String::new();
BufReader::new(&stream)
.read_line(&mut response)
.map_err(|e| anyhow!("CONNECT sent but failed to get response: {}", e))?;
if response.starts_with("OK") {
Ok(stream.into_raw_fd())
} else {
Err(anyhow!("CONNECT sent but response is not OK: {}", response).into())
}
.map_err(Error::Other)
};

timeout(Duration::from_millis(HVSOCK_RETRY_TIMEOUT_IN_MS), fut)
.await
.map_err(|_| anyhow!("hvsock retry {}ms timeout", HVSOCK_RETRY_TIMEOUT_IN_MS))?
})
.await
.map_err(|e| anyhow!("failed to spawn blocking task: {}", e))?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the timeout removed when waiting?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have three kinds of "connectiing" method for unix socket, vsock and hvsock. All the methonds only handle the connecting, leaving the timeout be handled in the upper caller, e.g. new_ttrpc_client.

}

pub fn unix_sock(r#abstract: bool, socket_path: &str) -> Result<UnixAddr> {
Expand Down
14 changes: 7 additions & 7 deletions vmm/sandbox/src/cloud_hypervisor/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,14 @@ limitations under the License.

use std::{
os::unix::net::UnixStream,
path::Path,
thread::sleep,
time::{Duration, SystemTime},
};

use anyhow::anyhow;
use api_client::{simple_api_command, simple_api_full_command_with_fds_and_response};
use containerd_sandbox::error::Result;
use log::{error, trace};
use log::error;

use crate::{
cloud_hypervisor::devices::{block::DiskConfig, AddDeviceResponse, RemoveDeviceRequest},
Expand All @@ -38,25 +37,26 @@ pub struct ChClient {
}

impl ChClient {
pub fn new<P: AsRef<Path>>(socket_path: P) -> Result<Self> {
pub async fn new(socket_path: String) -> Result<Self> {
let start_time = SystemTime::now();
loop {
tokio::task::spawn_blocking(move || loop {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we have to change it to async?

Copy link
Member Author

@Burning1020 Burning1020 Jan 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have called sync function UnixStream::connect(), thus it's better put it in a spawn_blocking

match UnixStream::connect(&socket_path) {
Ok(socket) => {
return Ok(Self { socket });
}
Err(e) => {
trace!("failed to create client: {:?}", e);
if start_time.elapsed().unwrap().as_secs()
> CLOUD_HYPERVISOR_START_TIMEOUT_IN_SEC
{
error!("failed to create client: {:?}", e);
error!("failed to connect api server: {:?}", e);
return Err(anyhow!("timeout connect client, {}", e).into());
}
sleep(Duration::from_millis(10));
}
}
}
})
.await
.map_err(|e| anyhow!("failed to spawn a task {}", e))?
}

pub fn hot_attach(&mut self, device_info: DeviceInfo) -> Result<String> {
Expand Down
1 change: 1 addition & 0 deletions vmm/sandbox/src/cloud_hypervisor/devices/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub mod device;
pub mod fs;
pub mod pmem;
pub mod rng;
pub mod vfio;
pub mod virtio_net;
pub mod vsock;

Expand Down
54 changes: 54 additions & 0 deletions vmm/sandbox/src/cloud_hypervisor/devices/vfio.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
Copyright 2022 The Kuasar Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

use sandbox_derive::CmdLineParams;

const VFIO_DEVICE_SYSFS_PATH: &str = "/sys/bus/pci/devices";

#[derive(CmdLineParams, Debug, Clone)]
#[params("device")]
pub struct VfioDevice {
#[property(ignore)]
pub id: String,
pub(crate) path: String,
}

impl_device_no_bus!(VfioDevice);

impl VfioDevice {
pub fn new(id: &str, bdf: &str) -> Self {
Self {
id: id.to_string(),
path: format!("{}/{}", VFIO_DEVICE_SYSFS_PATH, bdf),
}
}
}

#[cfg(test)]
mod tests {
use crate::{cloud_hypervisor::devices::vfio::VfioDevice, param::ToParams};

#[test]
fn test_attr() {
let device = VfioDevice::new("", "0000:b4:05.1");
let params = device.to_params();
let property = params.get(0).unwrap();
assert_eq!(
property.get("path").unwrap(),
"/sys/bus/pci/devices/0000:b4:05.1"
);
}
}
15 changes: 9 additions & 6 deletions vmm/sandbox/src/cloud_hypervisor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ use crate::{
cloud_hypervisor::{
client::ChClient,
config::{CloudHypervisorConfig, CloudHypervisorVMConfig, VirtiofsdConfig},
devices::{block::Disk, virtio_net::VirtioNetDevice, CloudHypervisorDevice},
devices::{
block::Disk, vfio::VfioDevice, virtio_net::VirtioNetDevice, CloudHypervisorDevice,
},
},
device::{BusType, DeviceInfo},
impl_recoverable, load_config,
Expand Down Expand Up @@ -110,7 +112,7 @@ impl CloudHypervisorVM {
}

async fn create_client(&self) -> Result<ChClient> {
ChClient::new(&self.config.api_socket)
ChClient::new(self.config.api_socket.to_string()).await
}

fn get_client(&mut self) -> Result<&mut ChClient> {
Expand All @@ -125,7 +127,7 @@ impl CloudHypervisorVM {
let mut cmd = tokio::process::Command::new(&self.virtiofsd_config.path);
cmd.args(params.as_slice());
debug!("start virtiofsd with cmdline: {:?}", cmd);
set_cmd_netns(&mut cmd, &self.netns)?;
set_cmd_netns(&mut cmd, self.netns.to_string())?;
cmd.stderr(Stdio::piped());
cmd.stdout(Stdio::piped());
let child = cmd
Expand Down Expand Up @@ -164,7 +166,7 @@ impl VM for CloudHypervisorVM {
cmd.args(params.as_slice());

set_cmd_fd(&mut cmd, self.fds.to_vec())?;
set_cmd_netns(&mut cmd, &self.netns)?;
set_cmd_netns(&mut cmd, self.netns.to_string())?;
cmd.stdout(Stdio::piped());
cmd.stderr(Stdio::piped());
debug!("start cloud hypervisor with cmdline: {:?}", cmd);
Expand Down Expand Up @@ -221,8 +223,9 @@ impl VM for CloudHypervisorVM {
);
self.add_device(device);
}
DeviceInfo::Physical(_vfio_info) => {
todo!()
DeviceInfo::Physical(vfio_info) => {
let device = VfioDevice::new(&vfio_info.id, &vfio_info.bdf);
self.add_device(device);
}
DeviceInfo::VhostUser(_vhost_user_info) => {
todo!()
Expand Down
29 changes: 25 additions & 4 deletions vmm/sandbox/src/network/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ pub struct NetworkInterface {
impl NetworkInterface {
pub async fn parse_from_message(
msg: LinkMessage,
_netns: &str,
netns: &str,
queue: u32,
handle: &Handle,
) -> Result<Self> {
Expand Down Expand Up @@ -266,6 +266,20 @@ impl NetworkInterface {
}
}
}
// find the pci device for unknown type interface, maybe it is a physical interface.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it certain that all physical interface with unknown type? Or is there any possibility that any other inferface with unknown type, will it cause any problem?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it certain that all physical interface with unknown type? Or is there any possibility that any other inferface with unknown type, will it cause any problem?

We checked the netlink.LinkDeserialize() golang code, every type does have a correspond struct, but some of the structs do not be used, so we centralize them by Unknown type.

if let LinkType::Unkonwn = intf.r#type {
// only search those with ip addresses
if !intf.ip_addresses.is_empty() {
let if_name = intf.name.to_string();
let bdf = if !netns.is_empty() {
run_in_new_netns(netns, move || get_bdf_for_eth(&if_name)).await??
abel-von marked this conversation as resolved.
Show resolved Hide resolved
} else {
get_bdf_for_eth(&if_name)?
};
let driver = get_pci_driver(&bdf).await?;
intf.r#type = LinkType::Physical(bdf, driver);
}
}
Ok(intf)
}

Expand Down Expand Up @@ -411,7 +425,6 @@ impl NetworkInterface {
}
}

#[allow(dead_code)]
fn get_bdf_for_eth(if_name: &str) -> Result<String> {
if if_name.len() > 16 {
return Err(anyhow!("the interface name length is larger than 16").into());
Expand Down Expand Up @@ -544,10 +557,11 @@ fn create_tap_device(tap_name: &str, mut queue: u32) -> Result<Vec<OwnedFd>> {
Ok(fds)
}

#[allow(dead_code)]
async fn get_pci_driver(bdf: &str) -> Result<String> {
let driver_path = format!("/sys/bus/pci/devices/{}/driver", bdf);
let driver_dest = tokio::fs::read_link(driver_path).await?;
let driver_dest = tokio::fs::read_link(&driver_path)
.await
.map_err(|e| anyhow!("fail to readlink of {} : {}", driver_path, e))?;
let file_name = driver_dest.file_name().ok_or(anyhow!(
"failed to get file name from driver path {:?}",
driver_dest
Expand All @@ -560,14 +574,21 @@ async fn get_pci_driver(bdf: &str) -> Result<String> {
}

async fn bind_device_to_driver(driver: &str, bdf: &str) -> Result<()> {
// 1. Switch the device driver
let driver_override_path = format!("/sys/bus/pci/devices/{}/driver_override", bdf);
write_file_async(&driver_override_path, driver).await?;

// 2. Unbind the device from its native driver
let unbind_path = format!("/sys/bus/pci/devices/{}/driver/unbind", bdf);
if Path::new(&*unbind_path).exists() {
write_file_async(&unbind_path, bdf).await?;
}

// 3. Probe driver for device
let probe_path = "/sys/bus/pci/drivers_probe";
write_file_async(probe_path, bdf).await?;

// 4. Check the result
let driver_link = format!("/sys/bus/pci/devices/{}/driver", bdf);
let driver_path = tokio::fs::read_link(&*driver_link).await?;

Expand Down
19 changes: 15 additions & 4 deletions vmm/sandbox/src/network/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::{fmt::Debug, os::unix::prelude::AsRawFd, path::Path};
use anyhow::anyhow;
use containerd_sandbox::error::Result;
use futures_util::TryStreamExt;
use log::{debug, error};
use log::{debug, error, warn};
use nix::{
fcntl::OFlag,
sched::{setns, CloneFlags},
Expand Down Expand Up @@ -64,9 +64,20 @@ impl Network {
let mut links = handle.link().get().execute();
let mut intfs = vec![];
while let Some(msg) = links.try_next().await.map_err(|e| anyhow!(e))? {
let network_interface =
NetworkInterface::parse_from_message(msg, &config.netns, config.queue, &handle)
.await?;
let network_interface = match NetworkInterface::parse_from_message(
msg,
&config.netns,
config.queue,
&handle,
)
.await
{
Ok(interface) => interface,
Err(e) => {
warn!("failed to parse network interface: {}, ignore it", e);
continue;
}
};
if let LinkType::Loopback = network_interface.r#type {
continue;
}
Expand Down
4 changes: 4 additions & 0 deletions vmm/sandbox/src/sandbox.rs
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,10 @@ where

async fn stop(&mut self, force: bool) -> Result<()> {
match self.status {
// If sandbox is created but not running, no need to stop.
SandboxStatus::Created => {
abel-von marked this conversation as resolved.
Show resolved Hide resolved
return Ok(());
}
SandboxStatus::Running(_) => {}
SandboxStatus::Stopped(_, _) => {
return Ok(());
Expand Down
Loading
Loading