Skip to content

Commit

Permalink
Add BorrowDatum for unsizing borrows of datums (#1891)
Browse files Browse the repository at this point in the history
Add a trait that allows borrowing Datums from various places, like
arguments and arrays, instead of always taking them by-value. This is
most important for unsizing borrows, as the sized kind are a simple
`.cast()` away.

The motivating rationale is to enable new APIs in the near future like
FlatArray and Text. It also brings in some more uniformity because of
the blanket-impl of ArgAbi for `&T`. This means that various types that
_are_ pass-by-reference may now actually be taken by reference from
their Datum, instead of copying them into the Rust function! This is
more important for unsized types, but for various reasons, right now it
only takes over the ArgAbi of one unsized type: CStr.
  • Loading branch information
workingjubilee authored Oct 1, 2024
1 parent f07f840 commit 901401c
Show file tree
Hide file tree
Showing 7 changed files with 261 additions and 5 deletions.
84 changes: 84 additions & 0 deletions pgrx-tests/src/tests/borrow_datum.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use pgrx::prelude::*;

// BorrowDatum only describes something as a valid argument, but every &T has a U we CAN return,
// going from &T as arg to T as ret type. We also don't necessarily have SPI support for each type.
// Thus we don't *exactly* reuse the roundtrip test macro.

macro_rules! clonetrip {
($fname:ident, $tname:ident, &$lt:lifetime $rtype:ty, &$expected:expr) => {
#[pg_extern]
fn $fname<$lt>(i: &$lt $rtype) -> $rtype {
i.clone()
}

clonetrip_test!($fname, $tname, &'_ $rtype, $expected);
};
($fname:ident, $tname:ident, &$lt:lifetime $ref_ty:ty => $own_ty:ty, $test:expr, $value:expr) => {
#[pg_extern]
fn $fname(i: &$ref_ty) -> $own_ty {
i.into()
}

clonetrip_test!($fname, $tname, &'_ $ref_ty => $own_ty, $test, $value);
};
}

macro_rules! clonetrip_test {
($fname:ident, $tname:ident, &'_ $rtype:ty, $expected:expr) => {
#[pg_test]
fn $tname() -> Result<(), Box<dyn std::error::Error>> {
let expected: $rtype = $expected;
let result: $rtype = Spi::get_one_with_args(
&format!("SELECT {}($1)", stringify!(tests.$fname)),
vec![(PgOid::from(<$rtype>::type_oid()), expected.into_datum())],
)?
.unwrap();

assert_eq!(&$expected, &result);
Ok(())
}
};
($fname:ident, $tname:ident, $ref_ty:ty => $own_ty:ty, $test:expr, $value:expr) => {
#[pg_test]
fn $tname() -> Result<(), Box<dyn std::error::Error>> {
let value: $own_ty = $value;
let result: $own_ty = Spi::get_one_with_args(
&format!("SELECT {}($1)", stringify!(tests.$fname)),
vec![(PgOid::from(<$ref_ty>::type_oid()), value.into_datum())],
)?
.unwrap();

assert_eq!($test, &*result);
Ok(())
}
};
}

#[cfg(any(test, feature = "pg_test"))]
#[pg_schema]
mod tests {
use super::*;
#[allow(unused)]
use crate as pgrx_tests;
use std::ffi::{CStr, CString};

// Exercising BorrowDatum impls
clonetrip!(clone_bool, test_clone_bool, &'a bool, &false);
clonetrip!(clone_i8, test_clone_i8, &'a i8, &i8::MIN);
clonetrip!(clone_i16, test_clone_i16, &'a i16, &i16::MIN);
clonetrip!(clone_i32, test_clone_i32, &'a i32, &-1i32);
clonetrip!(clone_f64, test_clone_f64, &'a f64, &f64::NEG_INFINITY);
clonetrip!(
clone_point,
test_clone_point,
&'a pg_sys::Point,
&pg_sys::Point { x: -1.0, y: f64::INFINITY }
);
clonetrip!(clone_str, test_clone_str, &'a CStr => CString, c"cee string", CString::from(c"cee string"));
clonetrip!(
clone_oid,
test_clone_oid,
&'a pg_sys::Oid,
&pg_sys::Oid::from(pg_sys::BuiltinOid::RECORDOID)
);
}
1 change: 1 addition & 0 deletions pgrx-tests/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ mod attributes_tests;
mod bgworker_tests;
#[cfg(feature = "cshim")]
mod bindings_of_inline_fn_tests;
mod borrow_datum;
mod bytea_tests;
mod cfg_tests;
mod complex;
Expand Down
36 changes: 34 additions & 2 deletions pgrx/src/callconv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@
#![deny(unsafe_op_in_unsafe_fn)]
//! Helper implementations for returning sets and tables from `#[pg_extern]`-style functions
use crate::datum::Datum;
use crate::datum::{
AnyArray, AnyElement, AnyNumeric, Date, FromDatum, Inet, Internal, Interval, IntoDatum, Json,
JsonB, Numeric, PgVarlena, Time, TimeWithTimeZone, Timestamp, TimestampWithTimeZone,
UnboxDatum, Uuid,
};
use crate::datum::{BorrowDatum, Datum};
use crate::datum::{Range, RangeSubType};
use crate::heap_tuple::PgHeapTuple;
use crate::layout::PassBy;
use crate::nullable::Nullable;
use crate::pg_sys;
use crate::pgbox::*;
Expand Down Expand Up @@ -265,7 +266,38 @@ argue_from_datum! { 'fcx; Date, Interval, Time, TimeWithTimeZone, Timestamp, Tim
argue_from_datum! { 'fcx; AnyArray, AnyElement, AnyNumeric }
argue_from_datum! { 'fcx; Inet, Internal, Json, JsonB, Uuid, PgRelation }
argue_from_datum! { 'fcx; pg_sys::BOX, pg_sys::ItemPointerData, pg_sys::Oid, pg_sys::Point }
argue_from_datum! { 'fcx; &'fcx str, &'fcx CStr, &'fcx [u8] }
// We could use the upcoming impl of ArgAbi for `&'fcx T where T: ?Sized + BorrowDatum`
// to support these types by implementing BorrowDatum for them also, but we reject this.
// It would greatly complicate other users of BorrowDatum like FlatArray, which want all impls
// of BorrowDatum to return a borrow of the entire pointee's len.
argue_from_datum! { 'fcx; &'fcx str, &'fcx [u8] }

unsafe impl<'fcx, T> ArgAbi<'fcx> for &'fcx T
where
T: ?Sized + BorrowDatum,
{
unsafe fn unbox_arg_unchecked(arg: Arg<'_, 'fcx>) -> &'fcx T {
let ptr: *mut u8 = match T::PASS {
PassBy::Ref => arg.2.value.cast_mut_ptr(),
PassBy::Value => ptr::addr_of!(arg.0.raw_args()[arg.1].value).cast_mut().cast(),
};
unsafe {
let ptr = ptr::NonNull::new_unchecked(ptr);
T::borrow_unchecked(ptr)
}
}

unsafe fn unbox_nullable_arg(arg: Arg<'_, 'fcx>) -> Nullable<Self> {
let ptr: Option<ptr::NonNull<u8>> = NonNull::new(match T::PASS {
PassBy::Ref => arg.2.value.cast_mut_ptr(),
PassBy::Value => ptr::addr_of!(arg.0.raw_args()[arg.1].value).cast_mut().cast(),
});
match (arg.is_null(), ptr) {
(true, _) | (false, None) => Nullable::Null,
(false, Some(ptr)) => unsafe { Nullable::Valid(T::borrow_unchecked(ptr)) },
}
}
}

/// How to return a value from Rust to Postgres
///
Expand Down
128 changes: 128 additions & 0 deletions pgrx/src/datum/borrow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#![deny(unsafe_op_in_unsafe_fn)]
use super::*;
use crate::layout::PassBy;
use core::{ffi, mem, ptr};

/// Types which can be "borrowed from" [`&Datum<'_>`] via simple cast, deref, or slicing
///
/// # Safety
/// Despite its pleasant-sounding name, this implements a fairly low-level detail.
/// It exists to allow other code to use that nice-sounding BorrowDatum bound.
/// Outside of the pgrx library, it is probably incorrect to call and rely on this:
/// instead use convenience functions like `Datum::borrow_as`.
///
/// Its behavior is trusted for ABI details, and it should not be implemented if any doubt
/// exists of whether the type would be suitable for passing via Postgres.
pub unsafe trait BorrowDatum {
/// The "native" passing convention for this type.
///
/// - `PassBy::Value` implies [`mem::size_of<T>()`][size_of] <= [`mem::size_of::<Datum>()`][Datum].
/// - `PassBy::Ref` means the pointee will occupy at least 1 byte for variable-sized types.
///
/// Note that this means a zero-sized type is inappropriate for `BorrowDatum`.
const PASS: PassBy;

/// Cast a pointer to this blob of bytes to a pointer to this type.
///
/// This is not a simple `ptr.cast()` because it may be *unsizing*, which may require
/// reading varlena headers. For all fixed-size types, `ptr.cast()` should be correct.
///
/// # Safety
/// - This must be correctly invoked for the pointee type, as it may deref and read one or more
/// bytes in its implementation in order to read the inline metadata and unsize the type.
/// - This must be invoked with a pointee initialized for the dynamically specified length.
///
/// ## For Implementers
/// Reading the **first** byte pointed to is permitted if `T::PASS = PassBy::Ref`, assuming you
/// are implementing a varlena type. As the other dynamic length type, CStr also does this.
/// This function
/// - must NOT mutate the pointee
/// - must point to the entire datum's length (`size_of_val` must not lose bytes)
///
/// Do not attempt to handle pass-by-value versus pass-by-ref in this fn's body!
/// A caller may be in a context where all types are handled by-reference, for instance.
unsafe fn point_from(ptr: ptr::NonNull<u8>) -> ptr::NonNull<Self>;

/// Cast a pointer to aligned varlena headers to this type
///
/// This version allows you to assume the pointer is aligned to, and readable for, 4 bytes.
/// This optimization is not required. When in doubt, avoid implementing it, and rely on your
/// `point_from` implementation alone.
///
/// # Safety
/// - This must be correctly invoked for the pointee type, as it may deref.
/// - This must be 4-byte aligned!
unsafe fn point_from_align4(ptr: ptr::NonNull<u32>) -> ptr::NonNull<Self> {
debug_assert!(ptr.is_aligned());
unsafe { BorrowDatum::point_from(ptr.cast()) }
}

/// Optimization for borrowing the referent
unsafe fn borrow_unchecked<'dat>(ptr: ptr::NonNull<u8>) -> &'dat Self {
unsafe { BorrowDatum::point_from(ptr).as_ref() }
}
}

/// From a pointer to a Datum, obtain a pointer to T's bytes
///
/// This may be None if T is PassBy::Ref
///
/// # Safety
/// Assumes the Datum is init
pub(crate) unsafe fn datum_ptr_to_bytes<T>(ptr: ptr::NonNull<Datum<'_>>) -> Option<ptr::NonNull<u8>>
where
T: BorrowDatum,
{
match T::PASS {
// Ptr<Datum> casts to Ptr<T>
PassBy::Value => Some(ptr.cast()),
// Ptr<Datum> derefs to Datum which to Ptr
PassBy::Ref => unsafe {
let datum = ptr.read();
let ptr = ptr::NonNull::new(datum.sans_lifetime().cast_mut_ptr());
ptr
},
}
}

macro_rules! impl_borrow_fixed_len {
($($value_ty:ty),*) => {
$(
unsafe impl BorrowDatum for $value_ty {
const PASS: PassBy = if mem::size_of::<Self>() <= mem::size_of::<Datum>() {
PassBy::Value
} else {
PassBy::Ref
};

unsafe fn point_from(ptr: ptr::NonNull<u8>) -> ptr::NonNull<Self> {
ptr.cast()
}
}
)*
}
}

impl_borrow_fixed_len! {
i8, i16, i32, i64, bool, f32, f64,
pg_sys::Oid, pg_sys::Point,
Date, Time, Timestamp, TimestampWithTimeZone
}

/// It is rare to pass CStr via Datums, but not unheard of
unsafe impl BorrowDatum for ffi::CStr {
const PASS: PassBy = PassBy::Ref;

unsafe fn point_from(ptr: ptr::NonNull<u8>) -> ptr::NonNull<Self> {
let char_ptr: *mut ffi::c_char = ptr.as_ptr().cast();
unsafe {
let len = ffi::CStr::from_ptr(char_ptr).to_bytes_with_nul().len();
ptr::NonNull::new_unchecked(ptr::slice_from_raw_parts_mut(char_ptr, len) as *mut Self)
}
}

unsafe fn borrow_unchecked<'dat>(ptr: ptr::NonNull<u8>) -> &'dat Self {
let char_ptr: *const ffi::c_char = ptr.as_ptr().cast();
unsafe { ffi::CStr::from_ptr(char_ptr) }
}
}
13 changes: 12 additions & 1 deletion pgrx/src/datum/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
mod anyarray;
mod anyelement;
mod array;
mod borrow;
mod date;
pub mod datetime_support;
mod from;
Expand Down Expand Up @@ -44,6 +45,7 @@ pub use self::uuid::*;
pub use anyarray::*;
pub use anyelement::*;
pub use array::*;
pub use borrow::*;
pub use date::*;
pub use datetime_support::*;
pub use from::*;
Expand All @@ -63,6 +65,7 @@ pub use varlena::*;
use crate::memcx::MemCx;
use crate::pg_sys;
use core::marker::PhantomData;
use core::ptr;
#[doc(hidden)]
pub use with_typeid::nonstatic_typeid;
pub use with_typeid::{WithArrayTypeIds, WithSizedTypeIds, WithTypeIds, WithVarlenaTypeIds};
Expand Down Expand Up @@ -136,14 +139,22 @@ pub struct Datum<'src>(
);

impl<'src> Datum<'src> {
/// The Datum without its lifetime.
/// Strip a Datum of its lifetime for FFI purposes.
pub fn sans_lifetime(self) -> pg_sys::Datum {
self.0
}
/// Construct a Datum containing only a null pointer.
pub fn null() -> Datum<'src> {
Self(pg_sys::Datum::from(0), PhantomData)
}

/// Reborrow the Datum as `T`
///
/// If the type is `PassBy::Ref`, this may be `None`.
pub unsafe fn borrow_as<T: BorrowDatum>(&self) -> Option<&T> {
let ptr = ptr::NonNull::new_unchecked(ptr::from_ref(self).cast_mut());
borrow::datum_ptr_to_bytes::<T>(ptr).map(|ptr| BorrowDatum::borrow_unchecked(ptr))
}
}

/// A tagging trait to indicate a user type is also meant to be used by Postgres
Expand Down
2 changes: 1 addition & 1 deletion pgrx/src/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub(crate) struct Layout {
}

#[derive(Clone, Copy, Debug, PartialEq)]
pub(crate) enum PassBy {
pub enum PassBy {
Ref,
Value,
}
Expand Down
2 changes: 1 addition & 1 deletion pgrx/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ pub mod htup;
pub mod inoutfuncs;
pub mod itemptr;
pub mod iter;
pub mod layout;
pub mod list;
pub mod lwlock;
pub mod memcx;
Expand All @@ -79,7 +80,6 @@ pub mod wrappers;
pub mod xid;

/// Not ready for public exposure.
mod layout;
mod ptr;
mod slice;
mod toast;
Expand Down

0 comments on commit 901401c

Please sign in to comment.