Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 5daaf16

Browse files
committedOct 12, 2024·
chore: rename table metadata to config and merge SetConfig and DeleteConfig
1 parent ffd7c09 commit 5daaf16

File tree

11 files changed

+234
-244
lines changed

11 files changed

+234
-244
lines changed
 

‎protos/table.proto

+3-3
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ message Manifest {
8686
// * 2: move_stable_row_ids: row IDs are tracked and stable after move operations
8787
// (such as compaction), but not updates.
8888
// * 4: use v2 format (deprecated)
89-
// * 8: table metadata are present
89+
// * 8: table config is present
9090
uint64 reader_feature_flags = 9;
9191

9292
// Feature flags for writers.
@@ -140,12 +140,12 @@ message Manifest {
140140
// This specifies what format is used to store the data files.
141141
DataStorageFormat data_format = 15;
142142

143-
// Table metadata.
143+
// Table config.
144144
//
145145
// Keys with the prefix "lance." are reserved for the Lance library. Other
146146
// libraries may wish to similarly prefix their configuration keys
147147
// appropriately.
148-
map<string, string> table_metadata = 16;
148+
map<string, string> config = 16;
149149
} // Manifest
150150

151151
// Auxiliary Data attached to a version.

‎protos/transaction.proto

+7-13
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ message Transaction {
6161
repeated lance.file.Field schema = 2;
6262
// Schema metadata.
6363
map<string, bytes> schema_metadata = 3;
64-
// Table metadata to merge with existing table metadata.
65-
map<string, string> table_metadata = 4;
64+
// Key-value pairs to merge with existing config.
65+
map<string, string> config_upsert_values = 4;
6666
}
6767

6868
// Add or replace a new secondary index.
@@ -159,16 +159,11 @@ message Transaction {
159159
repeated DataFragment new_fragments = 3;
160160
}
161161

162-
// An operation that sets table metadata.
163-
message SetMetadata {
164-
map<string, string> metadata = 1;
162+
// An operation that updates the table config.
163+
message UpdateConfig {
164+
map<string, string> upsert_values = 1;
165+
repeated string delete_keys = 2;
165166
}
166-
167-
// An operation that deletes table metadata.
168-
message DeleteMetadata {
169-
repeated string metadata_keys = 1;
170-
}
171-
172167

173168
// The operation of this transaction.
174169
oneof operation {
@@ -182,7 +177,6 @@ message Transaction {
182177
ReserveFragments reserve_fragments = 107;
183178
Update update = 108;
184179
Project project = 109;
185-
SetMetadata set_metadata = 110;
186-
DeleteMetadata delete_metadata = 111;
180+
UpdateConfig update_config = 110;
187181
}
188182
}

‎python/src/dataset.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ impl Operation {
272272
let op = LanceOperation::Overwrite {
273273
fragments,
274274
schema,
275-
table_metadata: None,
275+
config_upsert_values: None,
276276
};
277277
Ok(Self(op))
278278
}

‎rust/lance-table/src/feature_flags.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ pub fn apply_feature_flags(manifest: &mut Manifest, enable_stable_row_id: bool)
5858
}
5959

6060
// Test whether any table metadata has been set
61-
if !manifest.table_metadata.is_empty() {
61+
if !manifest.config.is_empty() {
6262
manifest.reader_feature_flags |= FLAG_TABLE_METADATA;
6363
manifest.writer_feature_flags |= FLAG_TABLE_METADATA;
6464
}

‎rust/lance-table/src/format/manifest.rs

+22-22
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ pub struct Manifest {
7979
/// The storage format of the data files.
8080
pub data_storage_format: DataStorageFormat,
8181

82-
/// Table metadata.
83-
pub table_metadata: HashMap<String, String>,
82+
/// Table configuration.
83+
pub config: HashMap<String, String>,
8484
}
8585

8686
fn compute_fragment_offsets(fragments: &[Fragment]) -> Vec<usize> {
@@ -120,7 +120,7 @@ impl Manifest {
120120
fragment_offsets,
121121
next_row_id: 0,
122122
data_storage_format,
123-
table_metadata: HashMap::new(),
123+
config: HashMap::new(),
124124
}
125125
}
126126

@@ -147,7 +147,7 @@ impl Manifest {
147147
fragment_offsets,
148148
next_row_id: previous.next_row_id,
149149
data_storage_format: previous.data_storage_format.clone(),
150-
table_metadata: previous.table_metadata.clone(),
150+
config: previous.config.clone(),
151151
}
152152
}
153153

@@ -167,15 +167,15 @@ impl Manifest {
167167
self.timestamp_nanos = nanos;
168168
}
169169

170-
/// Set the `table_metadata` from a metadata iterator
171-
pub fn set_metadata(&mut self, metadata: impl IntoIterator<Item = (String, String)>) {
172-
self.table_metadata.extend(metadata);
170+
/// Set the `config` from an iterator
171+
pub fn update_config(&mut self, upsert_values: impl IntoIterator<Item = (String, String)>) {
172+
self.config.extend(upsert_values);
173173
}
174174

175-
/// Delete `table_metadata` keys using a slice of metadata keys
176-
pub fn delete_metadata(&mut self, metadata_keys: &[&str]) {
177-
self.table_metadata
178-
.retain(|key, _| !metadata_keys.contains(&key.as_str()));
175+
/// Delete `config` keys using a slice of keys
176+
pub fn delete_config_keys(&mut self, delete_keys: &[&str]) {
177+
self.config
178+
.retain(|key, _| !delete_keys.contains(&key.as_str()));
179179
}
180180

181181
/// Check the current fragment list and update the high water mark
@@ -489,7 +489,7 @@ impl TryFrom<pb::Manifest> for Manifest {
489489
fragment_offsets,
490490
next_row_id: p.next_row_id,
491491
data_storage_format,
492-
table_metadata: p.table_metadata,
492+
config: p.config,
493493
})
494494
}
495495
}
@@ -532,7 +532,7 @@ impl From<&Manifest> for pb::Manifest {
532532
file_format: m.data_storage_format.file_format.clone(),
533533
version: m.data_storage_format.version.clone(),
534534
}),
535-
table_metadata: m.table_metadata.clone(),
535+
config: m.config.clone(),
536536
}
537537
}
538538
}
@@ -714,7 +714,7 @@ mod tests {
714714
}
715715

716716
#[test]
717-
fn test_table_metadata() {
717+
fn test_config() {
718718
let arrow_schema = ArrowSchema::new(vec![ArrowField::new(
719719
"a",
720720
arrow_schema::DataType::Int64,
@@ -728,15 +728,15 @@ mod tests {
728728
];
729729
let mut manifest = Manifest::new(schema, Arc::new(fragments), DataStorageFormat::default());
730730

731-
let mut metadata = HashMap::new();
732-
metadata.insert("lance:test".to_string(), "value".to_string());
733-
metadata.insert("other-key".to_string(), "other-value".to_string());
731+
let mut config = HashMap::new();
732+
config.insert("lance:test".to_string(), "value".to_string());
733+
config.insert("other-key".to_string(), "other-value".to_string());
734734

735-
manifest.set_metadata(metadata.clone());
736-
assert_eq!(manifest.table_metadata, metadata.clone());
735+
manifest.update_config(config.clone());
736+
assert_eq!(manifest.config, config.clone());
737737

738-
metadata.remove("other-key");
739-
manifest.delete_metadata(&["other-key"]);
740-
assert_eq!(manifest.table_metadata, metadata);
738+
config.remove("other-key");
739+
manifest.delete_config_keys(&["other-key"]);
740+
assert_eq!(manifest.config, config);
741741
}
742742
}

‎rust/lance-table/src/io/manifest.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -244,8 +244,8 @@ mod test {
244244
ArrowSchema::new(vec![ArrowField::new(long_name, DataType::Int64, false)]);
245245
let schema = Schema::try_from(&arrow_schema).unwrap();
246246

247-
let mut table_metadata = HashMap::new();
248-
table_metadata.insert("key".to_string(), "value".to_string());
247+
let mut config = HashMap::new();
248+
config.insert("key".to_string(), "value".to_string());
249249

250250
let mut manifest = Manifest::new(schema, Arc::new(vec![]), DataStorageFormat::default());
251251
let pos = write_manifest(&mut writer, &mut manifest, None)

‎rust/lance/src/dataset.rs

+22-20
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,7 @@ impl Dataset {
593593
WriteMode::Create | WriteMode::Overwrite => Operation::Overwrite {
594594
schema,
595595
fragments,
596-
table_metadata: None,
596+
config_upsert_values: None,
597597
},
598598
WriteMode::Append => Operation::Append { fragments },
599599
};
@@ -1510,15 +1510,16 @@ impl Dataset {
15101510
self.merge_impl(stream, left_on, right_on).await
15111511
}
15121512

1513-
/// Set key-value pairs in table metadata.
1514-
pub async fn set_table_metadata(
1513+
/// Update key-value pairs in config.
1514+
pub async fn update_config(
15151515
&mut self,
1516-
metadata: impl IntoIterator<Item = (String, String)>,
1516+
upsert_values: impl IntoIterator<Item = (String, String)>,
15171517
) -> Result<()> {
15181518
let transaction = Transaction::new(
15191519
self.manifest.version,
1520-
Operation::SetMetadata {
1521-
table_metadata: HashMap::from_iter(metadata),
1520+
Operation::UpdateConfig {
1521+
upsert_values: Some(HashMap::from_iter(upsert_values)),
1522+
delete_keys: None,
15221523
},
15231524
None,
15241525
);
@@ -1539,12 +1540,13 @@ impl Dataset {
15391540
Ok(())
15401541
}
15411542

1542-
/// Delete keys from the table metadata.
1543-
pub async fn delete_table_metadata(&mut self, metadata_keys: &[&str]) -> Result<()> {
1543+
/// Delete keys from the config.
1544+
pub async fn delete_config_keys(&mut self, delete_keys: &[&str]) -> Result<()> {
15441545
let transaction = Transaction::new(
15451546
self.manifest.version,
1546-
Operation::DeleteMetadata {
1547-
table_metadata_keys: Vec::from_iter(metadata_keys.iter().map(ToString::to_string)),
1547+
Operation::UpdateConfig {
1548+
upsert_values: None,
1549+
delete_keys: Some(Vec::from_iter(delete_keys.iter().map(ToString::to_string))),
15481550
},
15491551
None,
15501552
);
@@ -2861,7 +2863,7 @@ mod tests {
28612863
let operation = Operation::Overwrite {
28622864
fragments: vec![],
28632865
schema,
2864-
table_metadata: None,
2866+
config_upsert_values: None,
28652867
};
28662868
let test_dir = tempdir().unwrap();
28672869
let test_uri = test_dir.path().to_str().unwrap();
@@ -3299,7 +3301,7 @@ mod tests {
32993301

33003302
#[rstest]
33013303
#[tokio::test]
3302-
async fn test_table_metadata() {
3304+
async fn test_update_config() {
33033305
// Create a table
33043306
let schema = Arc::new(ArrowSchema::new(vec![ArrowField::new(
33053307
"i",
@@ -3317,16 +3319,16 @@ mod tests {
33173319
let reader = RecordBatchIterator::new(vec![data.unwrap()].into_iter().map(Ok), schema);
33183320
let mut dataset = Dataset::write(reader, test_uri, None).await.unwrap();
33193321

3320-
let mut metadata = HashMap::new();
3321-
metadata.insert("lance:test".to_string(), "value".to_string());
3322-
metadata.insert("other-key".to_string(), "other-value".to_string());
3322+
let mut desired_config = HashMap::new();
3323+
desired_config.insert("lance:test".to_string(), "value".to_string());
3324+
desired_config.insert("other-key".to_string(), "other-value".to_string());
33233325

3324-
dataset.set_table_metadata(metadata.clone()).await.unwrap();
3325-
assert_eq!(dataset.manifest.table_metadata, metadata);
3326+
dataset.update_config(desired_config.clone()).await.unwrap();
3327+
assert_eq!(dataset.manifest.config, desired_config);
33263328

3327-
metadata.remove("other-key");
3328-
dataset.delete_table_metadata(&["other-key"]).await.unwrap();
3329-
assert_eq!(dataset.manifest.table_metadata, metadata);
3329+
desired_config.remove("other-key");
3330+
dataset.delete_config_keys(&["other-key"]).await.unwrap();
3331+
assert_eq!(dataset.manifest.config, desired_config);
33303332
}
33313333

33323334
#[rstest]

‎rust/lance/src/dataset/fragment.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -2357,7 +2357,7 @@ mod tests {
23572357
let op = Operation::Overwrite {
23582358
schema: schema.clone(),
23592359
fragments,
2360-
table_metadata: None,
2360+
config_upsert_values: None,
23612361
};
23622362

23632363
let registry = Arc::new(ObjectStoreRegistry::default());
@@ -2458,7 +2458,7 @@ mod tests {
24582458
let op = Operation::Overwrite {
24592459
fragments: vec![new_fragment],
24602460
schema: full_schema.clone(),
2461-
table_metadata: None,
2461+
config_upsert_values: None,
24622462
};
24632463

24642464
let registry = Arc::new(ObjectStoreRegistry::default());

‎rust/lance/src/dataset/transaction.rs

+166-172
Large diffs are not rendered by default.

‎rust/lance/src/io/commit.rs

+7-7
Original file line numberDiff line numberDiff line change
@@ -939,14 +939,14 @@ mod tests {
939939
async fn test_good_concurrent_config_writes() {
940940
let dataset = get_empty_dataset().await;
941941

942-
// Test successful concurrent set operations
942+
// Test successful concurrent insert config operations
943943
let futures: Vec<_> = ["key1", "key2", "key3", "key4", "key5"]
944944
.iter()
945945
.map(|key| {
946946
let mut dataset = dataset.clone();
947947
tokio::spawn(async move {
948948
dataset
949-
.set_table_metadata(vec![(key.to_string(), "value".to_string())])
949+
.update_config(vec![(key.to_string(), "value".to_string())])
950950
.await
951951
})
952952
})
@@ -959,7 +959,7 @@ mod tests {
959959
}
960960

961961
let dataset = dataset.checkout_version(6).await.unwrap();
962-
assert_eq!(dataset.manifest.table_metadata.len(), 5);
962+
assert_eq!(dataset.manifest.config.len(), 5);
963963

964964
dataset.validate().await.unwrap();
965965

@@ -969,7 +969,7 @@ mod tests {
969969
.iter()
970970
.map(|key| {
971971
let mut dataset = dataset.clone();
972-
tokio::spawn(async move { dataset.delete_table_metadata(&[key]).await })
972+
tokio::spawn(async move { dataset.delete_config_keys(&[key]).await })
973973
})
974974
.collect();
975975
let results = join_all(futures).await;
@@ -982,14 +982,14 @@ mod tests {
982982
let dataset = dataset.checkout_version(11).await.unwrap();
983983

984984
// There are now two fewer keys
985-
assert_eq!(dataset.manifest.table_metadata.len(), 3);
985+
assert_eq!(dataset.manifest.config.len(), 3);
986986

987987
dataset.validate().await.unwrap()
988988
}
989989

990990
#[tokio::test]
991991
async fn test_bad_concurrent_config_writes() {
992-
// If two concurrent set metadata operations occur for the same key, a
992+
// If two concurrent insert config operations occur for the same key, a
993993
// `CommitConflict` should be returned
994994
let dataset = get_empty_dataset().await;
995995

@@ -999,7 +999,7 @@ mod tests {
999999
let mut dataset = dataset.clone();
10001000
tokio::spawn(async move {
10011001
dataset
1002-
.set_table_metadata(vec![(key.to_string(), "value".to_string())])
1002+
.update_config(vec![(key.to_string(), "value".to_string())])
10031003
.await
10041004
})
10051005
})

‎rust/lance/src/utils/test.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ impl TestDatasetGenerator {
113113
let operation = Operation::Overwrite {
114114
fragments,
115115
schema,
116-
table_metadata: None,
116+
config_upsert_values: None,
117117
};
118118

119119
let registry = Arc::new(ObjectStoreRegistry::default());

0 commit comments

Comments
 (0)
Please sign in to comment.