Skip to content

Commit 40ba433

Browse files
authored
feat: store IVF in global buffer (lancedb#2449)
1 parent 0a1944f commit 40ba433

File tree

6 files changed

+28
-22
lines changed

6 files changed

+28
-22
lines changed

Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ half = { "version" = "2.4.1", default-features = false, features = [
7979
"num-traits",
8080
"std",
8181
] }
82-
hex = "0.4"
8382
bitvec = "1"
8483
bytes = "1.4"
8584
byteorder = "1.5"

rust/lance-index/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ datafusion.workspace = true
2828
deepsize.workspace = true
2929
futures.workspace = true
3030
half.workspace = true
31-
hex.workspace = true
3231
itertools.workspace = true
3332
lance-arrow.workspace = true
3433
lance-core.workspace = true

rust/lance-index/src/vector/storage.rs

+8-4
Original file line numberDiff line numberDiff line change
@@ -172,16 +172,20 @@ impl<Q: Quantization> IvfQuantizationStorage<Q> {
172172
.as_str(),
173173
)?;
174174

175-
let ivf_pb_bytes =
176-
hex::decode(schema.metadata.get(IVF_METADATA_KEY).ok_or(Error::Index {
175+
let ivf_pos = schema
176+
.metadata
177+
.get(IVF_METADATA_KEY)
178+
.ok_or(Error::Index {
177179
message: format!("{} not found", IVF_METADATA_KEY),
178180
location: location!(),
179-
})?)
181+
})?
182+
.parse()
180183
.map_err(|e| Error::Index {
181184
message: format!("Failed to decode IVF metadata: {}", e),
182185
location: location!(),
183186
})?;
184-
let ivf = IvfData::try_from(pb::Ivf::decode(ivf_pb_bytes.as_ref())?)?;
187+
let ivf_bytes = reader.read_global_buffer(ivf_pos).await?;
188+
let ivf = IvfData::try_from(pb::Ivf::decode(ivf_bytes)?)?;
185189

186190
let quantizer_metadata: Q::Metadata = serde_json::from_str(
187191
schema

rust/lance/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ dashmap = "5"
4444
deepsize.workspace = true
4545
# matches arrow-rs use
4646
half.workspace = true
47-
hex.workspace = true
4847
itertools.workspace = true
4948
object_store = { workspace = true, features = ["aws", "gcp", "azure"] }
5049
aws-credential-types.workspace = true

rust/lance/src/index/vector/builder.rs

+8-6
Original file line numberDiff line numberDiff line change
@@ -303,19 +303,21 @@ impl<S: IvfSubIndex, Q: Quantization + Clone> IvfIndexBuilder<S, Q> {
303303
let mut storage_writer = storage_writer.unwrap();
304304
let storage_ivf_pb = pb::Ivf::try_from(&storage_ivf)?;
305305
storage_writer.add_schema_metadata(DISTANCE_TYPE_KEY, self.distance_type.to_string());
306-
storage_writer.add_schema_metadata(
307-
IVF_METADATA_KEY,
308-
hex::encode(storage_ivf_pb.encode_to_vec()),
309-
);
306+
let ivf_buffer_pos = storage_writer
307+
.add_global_buffer(storage_ivf_pb.encode_to_vec().into())
308+
.await?;
309+
storage_writer.add_schema_metadata(IVF_METADATA_KEY, ivf_buffer_pos.to_string());
310310
storage_writer.add_schema_metadata(
311311
Q::metadata_key(),
312312
self.quantizer.metadata(None)?.to_string(),
313313
);
314314

315315
let index_ivf_pb = pb::Ivf::try_from(&index_ivf)?;
316316
index_writer.add_schema_metadata(DISTANCE_TYPE_KEY, self.distance_type.to_string());
317-
index_writer
318-
.add_schema_metadata(IVF_METADATA_KEY, hex::encode(index_ivf_pb.encode_to_vec()));
317+
let ivf_buffer_pos = index_writer
318+
.add_global_buffer(index_ivf_pb.encode_to_vec().into())
319+
.await?;
320+
index_writer.add_schema_metadata(IVF_METADATA_KEY, ivf_buffer_pos.to_string());
319321

320322
storage_writer.finish().await?;
321323
index_writer.finish().await?;

rust/lance/src/index/vector/ivf/v2.rs

+12-9
Original file line numberDiff line numberDiff line change
@@ -107,18 +107,21 @@ impl<I: IvfSubIndex + 'static, Q: Quantization> IVFIndex<I, Q> {
107107
.as_str(),
108108
)?;
109109

110-
let ivf_pb_bytes =
111-
hex::decode(index_reader.schema().metadata.get(IVF_METADATA_KEY).ok_or(
112-
Error::Index {
113-
message: format!("{} not found", IVF_METADATA_KEY),
114-
location: location!(),
115-
},
116-
)?)
110+
let ivf_pos = index_reader
111+
.schema()
112+
.metadata
113+
.get(IVF_METADATA_KEY)
114+
.ok_or(Error::Index {
115+
message: format!("{} not found", IVF_METADATA_KEY),
116+
location: location!(),
117+
})?
118+
.parse()
117119
.map_err(|e| Error::Index {
118-
message: format!("Failed to decode IVF metadata: {}", e),
120+
message: format!("Failed to decode IVF position: {}", e),
119121
location: location!(),
120122
})?;
121-
let ivf = Ivf::try_from(&pb::Ivf::decode(ivf_pb_bytes.as_ref())?)?;
123+
let ivf_pb_bytes = index_reader.read_global_buffer(ivf_pos).await?;
124+
let ivf = Ivf::try_from(&pb::Ivf::decode(ivf_pb_bytes)?)?;
122125

123126
let storage_reader = FileReader::try_open(
124127
scheduler

0 commit comments

Comments
 (0)