|
| 1 | +// SPDX-License-Identifier: Apache-2.0 |
| 2 | +// SPDX-FileCopyrightText: Copyright The Lance Authors |
| 3 | + |
| 4 | +use arrow::array::AsArray; |
| 5 | +use arrow_array::RecordBatch; |
| 6 | +use arrow_schema::Field; |
| 7 | +use lance_arrow::RecordBatchExt; |
| 8 | +use lance_core::Error; |
| 9 | +use snafu::{location, Location}; |
| 10 | +use tracing::instrument; |
| 11 | + |
| 12 | +use crate::vector::transform::Transformer; |
| 13 | + |
| 14 | +use super::storage::FLAT_COLUMN; |
| 15 | + |
| 16 | +#[derive(Debug)] |
| 17 | +pub struct FlatTransformer { |
| 18 | + input_column: String, |
| 19 | +} |
| 20 | + |
| 21 | +impl FlatTransformer { |
| 22 | + pub fn new(input_column: impl AsRef<str>) -> Self { |
| 23 | + Self { |
| 24 | + input_column: input_column.as_ref().to_owned(), |
| 25 | + } |
| 26 | + } |
| 27 | +} |
| 28 | + |
| 29 | +impl Transformer for FlatTransformer { |
| 30 | + #[instrument(name = "FlatTransformer::transform", level = "debug", skip_all)] |
| 31 | + fn transform(&self, batch: &RecordBatch) -> crate::Result<RecordBatch> { |
| 32 | + let input_arr = batch |
| 33 | + .column_by_name(&self.input_column) |
| 34 | + .ok_or(Error::Index { |
| 35 | + message: format!( |
| 36 | + "FlatTransform: column {} not found in batch", |
| 37 | + self.input_column |
| 38 | + ), |
| 39 | + location: location!(), |
| 40 | + })?; |
| 41 | + let field = Field::new( |
| 42 | + FLAT_COLUMN, |
| 43 | + input_arr.data_type().clone(), |
| 44 | + input_arr.is_nullable(), |
| 45 | + ); |
| 46 | + // rename the column to FLAT_COLUMN |
| 47 | + let batch = batch |
| 48 | + .drop_column(&self.input_column)? |
| 49 | + .try_with_column(field, input_arr.clone())?; |
| 50 | + Ok(batch) |
| 51 | + } |
| 52 | +} |
0 commit comments