Skip to content

Commit a20c35b

Browse files
authored
Merge branch 'main' into benliepert/update_ndarray
2 parents 65276b0 + 81d3e0b commit a20c35b

27 files changed

+760
-566
lines changed

crates/store/re_chunk_store/src/dataframe.rs

+270-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use arrow2::{
99
};
1010
use itertools::Itertools as _;
1111

12-
use re_chunk::LatestAtQuery;
12+
use re_chunk::{LatestAtQuery, TimelineName};
1313
use re_log_types::{EntityPath, TimeInt, Timeline};
1414
use re_log_types::{EntityPathFilter, ResolvedTimeRange};
1515
use re_types_core::{ArchetypeName, ComponentName, Loggable as _};
@@ -27,7 +27,7 @@ use crate::RowId;
2727
/// Because range-queries often involve repeating the same joined-in data multiple times,
2828
/// the strategy we choose for joining can have a significant impact on the size and memory
2929
/// overhead of the `RecordBatch`.
30-
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
30+
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
3131
pub enum JoinEncoding {
3232
/// Slice the `RecordBatch` to minimal overlapping sub-ranges.
3333
///
@@ -398,6 +398,181 @@ impl ComponentColumnDescriptor {
398398
}
399399
}
400400

401+
// --- Selectors ---
402+
403+
/// Describes a column selection to return as part of a query.
404+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
405+
pub enum ColumnSelector {
406+
Control(ControlColumnSelector),
407+
Time(TimeColumnSelector),
408+
Component(ComponentColumnSelector),
409+
//TODO(jleibs): Add support for archetype-based component selection.
410+
//ArchetypeField(ArchetypeFieldColumnSelector),
411+
}
412+
413+
impl From<ColumnDescriptor> for ColumnSelector {
414+
#[inline]
415+
fn from(desc: ColumnDescriptor) -> Self {
416+
match desc {
417+
ColumnDescriptor::Control(desc) => Self::Control(desc.into()),
418+
ColumnDescriptor::Time(desc) => Self::Time(desc.into()),
419+
ColumnDescriptor::Component(desc) => Self::Component(desc.into()),
420+
}
421+
}
422+
}
423+
424+
impl From<ControlColumnSelector> for ColumnSelector {
425+
#[inline]
426+
fn from(desc: ControlColumnSelector) -> Self {
427+
Self::Control(desc)
428+
}
429+
}
430+
431+
impl From<TimeColumnSelector> for ColumnSelector {
432+
#[inline]
433+
fn from(desc: TimeColumnSelector) -> Self {
434+
Self::Time(desc)
435+
}
436+
}
437+
438+
impl From<ComponentColumnSelector> for ColumnSelector {
439+
#[inline]
440+
fn from(desc: ComponentColumnSelector) -> Self {
441+
Self::Component(desc)
442+
}
443+
}
444+
445+
/// Select a control column.
446+
///
447+
/// The only control column currently supported is `rerun.components.RowId`.
448+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
449+
pub struct ControlColumnSelector {
450+
/// Name of the control column.
451+
pub component: ComponentName,
452+
}
453+
454+
impl ControlColumnSelector {
455+
#[inline]
456+
pub fn row_id() -> Self {
457+
Self {
458+
component: RowId::name(),
459+
}
460+
}
461+
}
462+
463+
impl From<ControlColumnDescriptor> for ControlColumnSelector {
464+
#[inline]
465+
fn from(desc: ControlColumnDescriptor) -> Self {
466+
Self {
467+
component: desc.component_name,
468+
}
469+
}
470+
}
471+
472+
/// Select a time column.
473+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
474+
pub struct TimeColumnSelector {
475+
/// The name of the timeline.
476+
pub timeline: TimelineName,
477+
}
478+
479+
impl From<TimeColumnDescriptor> for TimeColumnSelector {
480+
#[inline]
481+
fn from(desc: TimeColumnDescriptor) -> Self {
482+
Self {
483+
timeline: *desc.timeline.name(),
484+
}
485+
}
486+
}
487+
488+
/// Select a component based on its `EntityPath` and `ComponentName`.
489+
///
490+
/// Note, that in the future when Rerun supports duplicate tagged components
491+
/// on the same entity, this selector may be ambiguous. In this case, the
492+
/// query result will return an Error if it cannot determine a single selected
493+
/// component.
494+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
495+
pub struct ComponentColumnSelector {
496+
/// The path of the entity.
497+
pub entity_path: EntityPath,
498+
499+
/// Semantic name associated with this data.
500+
pub component: ComponentName,
501+
502+
/// How to join the data into the `RecordBatch`.
503+
pub join_encoding: JoinEncoding,
504+
}
505+
506+
impl From<ComponentColumnDescriptor> for ComponentColumnSelector {
507+
#[inline]
508+
fn from(desc: ComponentColumnDescriptor) -> Self {
509+
Self {
510+
entity_path: desc.entity_path.clone(),
511+
component: desc.component_name,
512+
join_encoding: desc.join_encoding,
513+
}
514+
}
515+
}
516+
517+
impl ComponentColumnSelector {
518+
/// Select a component of a given type, based on its [`EntityPath`]
519+
#[inline]
520+
pub fn new<C: re_types_core::Component>(entity_path: EntityPath) -> Self {
521+
Self {
522+
entity_path,
523+
component: C::name(),
524+
join_encoding: JoinEncoding::default(),
525+
}
526+
}
527+
528+
/// Select a component based on its [`EntityPath`] and [`ComponentName`].
529+
#[inline]
530+
pub fn new_for_component_name(entity_path: EntityPath, component: ComponentName) -> Self {
531+
Self {
532+
entity_path,
533+
component,
534+
join_encoding: JoinEncoding::default(),
535+
}
536+
}
537+
538+
/// Specify how the data should be joined into the `RecordBatch`.
539+
#[inline]
540+
pub fn with_join_encoding(mut self, join_encoding: JoinEncoding) -> Self {
541+
self.join_encoding = join_encoding;
542+
self
543+
}
544+
}
545+
546+
impl std::fmt::Display for ComponentColumnSelector {
547+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
548+
let Self {
549+
entity_path,
550+
component,
551+
join_encoding: _,
552+
} = self;
553+
554+
f.write_fmt(format_args!("{entity_path}@{}", component.short_name()))
555+
}
556+
}
557+
558+
// TODO(jleibs): Add support for archetype-based column selection.
559+
/*
560+
/// Select a component based on its `Archetype` and field.
561+
pub struct ArchetypeFieldColumnSelector {
562+
/// The path of the entity.
563+
entity_path: EntityPath,
564+
565+
/// Name of the `Archetype` associated with this data.
566+
archetype: ArchetypeName,
567+
568+
/// The field within the `Archetype` associated with this data.
569+
field: String,
570+
571+
/// How to join the data into the `RecordBatch`.
572+
join_encoding: JoinEncoding,
573+
}
574+
*/
575+
401576
// --- Queries ---
402577

403578
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -501,7 +676,7 @@ pub struct RangeQueryExpression {
501676
/// multiple rows at a given timestamp.
502677
//
503678
// TODO(cmc): issue for multi-pov support
504-
pub pov: ComponentColumnDescriptor,
679+
pub pov: ComponentColumnSelector,
505680
//
506681
// TODO(cmc): custom join policy support
507682
}
@@ -622,6 +797,98 @@ impl ChunkStore {
622797
controls.chain(timelines).chain(components).collect()
623798
}
624799

800+
/// Given a [`ControlColumnSelector`], returns the corresponding [`ControlColumnDescriptor`].
801+
#[allow(clippy::unused_self)]
802+
pub fn resolve_control_selector(
803+
&self,
804+
selector: &ControlColumnSelector,
805+
) -> ControlColumnDescriptor {
806+
if selector.component == RowId::name() {
807+
ControlColumnDescriptor {
808+
component_name: selector.component,
809+
datatype: RowId::arrow_datatype(),
810+
}
811+
} else {
812+
ControlColumnDescriptor {
813+
component_name: selector.component,
814+
datatype: ArrowDatatype::Null,
815+
}
816+
}
817+
}
818+
819+
/// Given a [`TimeColumnSelector`], returns the corresponding [`TimeColumnDescriptor`].
820+
pub fn resolve_time_selector(&self, selector: &TimeColumnSelector) -> TimeColumnDescriptor {
821+
let timelines = self.all_timelines();
822+
823+
let timeline = timelines
824+
.iter()
825+
.find(|timeline| timeline.name() == &selector.timeline)
826+
.copied()
827+
.unwrap_or_else(|| Timeline::new_temporal(selector.timeline));
828+
829+
TimeColumnDescriptor {
830+
timeline,
831+
datatype: timeline.datatype(),
832+
}
833+
}
834+
835+
/// Given a [`ComponentColumnSelector`], returns the corresponding [`ComponentColumnDescriptor`].
836+
///
837+
/// If the component is not found in the store, a default descriptor is returned with a null datatype.
838+
pub fn resolve_component_selector(
839+
&self,
840+
selector: &ComponentColumnSelector,
841+
) -> ComponentColumnDescriptor {
842+
let datatype = self
843+
.lookup_datatype(&selector.component)
844+
.cloned()
845+
.unwrap_or_else(|| ArrowDatatype::Null);
846+
847+
let is_static = self
848+
.static_chunk_ids_per_entity
849+
.get(&selector.entity_path)
850+
.map_or(false, |per_component| {
851+
per_component.contains_key(&selector.component)
852+
});
853+
854+
// TODO(#6889): Fill `archetype_name`/`archetype_field_name` (or whatever their
855+
// final name ends up being) once we generate tags.
856+
ComponentColumnDescriptor {
857+
entity_path: selector.entity_path.clone(),
858+
archetype_name: None,
859+
archetype_field_name: None,
860+
component_name: selector.component,
861+
store_datatype: ArrowListArray::<i32>::default_datatype(datatype.clone()),
862+
join_encoding: selector.join_encoding,
863+
is_static,
864+
}
865+
}
866+
867+
/// Given a set of [`ColumnSelector`]s, returns the corresponding [`ColumnDescriptor`]s.
868+
pub fn resolve_selectors(
869+
&self,
870+
selectors: impl IntoIterator<Item = impl Into<ColumnSelector>>,
871+
) -> Vec<ColumnDescriptor> {
872+
// TODO(jleibs): When, if ever, should this return an error?
873+
selectors
874+
.into_iter()
875+
.map(|selector| {
876+
let selector = selector.into();
877+
match selector {
878+
ColumnSelector::Control(selector) => {
879+
ColumnDescriptor::Control(self.resolve_control_selector(&selector))
880+
}
881+
ColumnSelector::Time(selector) => {
882+
ColumnDescriptor::Time(self.resolve_time_selector(&selector))
883+
}
884+
ColumnSelector::Component(selector) => {
885+
ColumnDescriptor::Component(self.resolve_component_selector(&selector))
886+
}
887+
}
888+
})
889+
.collect()
890+
}
891+
625892
/// Returns the filtered schema for the given query expression.
626893
///
627894
/// This will only include columns which may contain non-empty values from the perspective of

crates/store/re_chunk_store/src/lib.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ mod subscribers;
2424
mod writes;
2525

2626
pub use self::dataframe::{
27-
ColumnDescriptor, ComponentColumnDescriptor, ControlColumnDescriptor, JoinEncoding,
28-
LatestAtQueryExpression, QueryExpression, RangeQueryExpression, TimeColumnDescriptor,
27+
ColumnDescriptor, ColumnSelector, ComponentColumnDescriptor, ComponentColumnSelector,
28+
ControlColumnDescriptor, ControlColumnSelector, JoinEncoding, LatestAtQueryExpression,
29+
QueryExpression, RangeQueryExpression, TimeColumnDescriptor, TimeColumnSelector,
2930
};
3031
pub use self::events::{ChunkStoreDiff, ChunkStoreDiffKind, ChunkStoreEvent};
3132
pub use self::gc::{GarbageCollectionOptions, GarbageCollectionTarget};

crates/store/re_dataframe/examples/range.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use itertools::Itertools as _;
22

33
use re_chunk_store::{
4-
ChunkStore, ChunkStoreConfig, ComponentColumnDescriptor, RangeQueryExpression, Timeline,
4+
ChunkStore, ChunkStoreConfig, ComponentColumnSelector, RangeQueryExpression, Timeline,
55
VersionPolicy,
66
};
77
use re_dataframe::QueryEngine;
@@ -46,7 +46,7 @@ fn main() -> anyhow::Result<()> {
4646
entity_path_filter: entity_path_filter.clone(),
4747
timeline: Timeline::log_tick(),
4848
time_range: ResolvedTimeRange::new(0, 30),
49-
pov: ComponentColumnDescriptor::new::<re_types::components::Position3D>(
49+
pov: ComponentColumnSelector::new::<re_types::components::Position3D>(
5050
entity_path_pov.into(),
5151
),
5252
};

crates/store/re_dataframe/examples/range_paginated.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use itertools::Itertools as _;
44

55
use re_chunk_store::{
6-
ChunkStore, ChunkStoreConfig, ComponentColumnDescriptor, RangeQueryExpression, Timeline,
6+
ChunkStore, ChunkStoreConfig, ComponentColumnSelector, RangeQueryExpression, Timeline,
77
VersionPolicy,
88
};
99
use re_dataframe::{QueryEngine, RecordBatch};
@@ -48,7 +48,7 @@ fn main() -> anyhow::Result<()> {
4848
entity_path_filter: entity_path_filter.clone(),
4949
timeline: Timeline::log_tick(),
5050
time_range: ResolvedTimeRange::new(0, 30),
51-
pov: ComponentColumnDescriptor::new::<re_types::components::Position3D>(
51+
pov: ComponentColumnSelector::new::<re_types::components::Position3D>(
5252
entity_path_pov.into(),
5353
),
5454
};

crates/store/re_dataframe/src/engine.rs

+7-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use re_chunk::TransportChunk;
22
use re_chunk_store::{
3-
ChunkStore, ColumnDescriptor, LatestAtQueryExpression, QueryExpression, RangeQueryExpression,
3+
ChunkStore, ColumnDescriptor, ColumnSelector, LatestAtQueryExpression, QueryExpression,
4+
RangeQueryExpression,
45
};
56
use re_query::Caches;
67

@@ -98,7 +99,7 @@ impl QueryEngine<'_> {
9899
pub fn query(
99100
&self,
100101
query: &QueryExpression,
101-
columns: Option<Vec<ColumnDescriptor>>,
102+
columns: Option<Vec<ColumnSelector>>,
102103
) -> QueryHandle<'_> {
103104
match query {
104105
QueryExpression::LatestAt(query) => self.latest_at(query, columns).into(),
@@ -133,8 +134,9 @@ impl QueryEngine<'_> {
133134
pub fn latest_at(
134135
&self,
135136
query: &LatestAtQueryExpression,
136-
columns: Option<Vec<ColumnDescriptor>>,
137+
columns: Option<Vec<ColumnSelector>>,
137138
) -> LatestAtQueryHandle<'_> {
139+
let columns = columns.map(|selectors| self.store.resolve_selectors(selectors));
138140
LatestAtQueryHandle::new(self, query.clone(), columns)
139141
}
140142

@@ -165,8 +167,9 @@ impl QueryEngine<'_> {
165167
pub fn range(
166168
&self,
167169
query: &RangeQueryExpression,
168-
columns: Option<Vec<ColumnDescriptor>>,
170+
columns: Option<Vec<ColumnSelector>>,
169171
) -> RangeQueryHandle<'_> {
172+
let columns = columns.map(|selectors| self.store.resolve_selectors(selectors));
170173
RangeQueryHandle::new(self, query.clone(), columns)
171174
}
172175
}

0 commit comments

Comments
 (0)