-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexample.cpp
61 lines (52 loc) · 1.85 KB
/
example.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#include <iostream>
#include <random>
#include <pgvector/pqxx.hpp>
#include <pqxx/pqxx>
int main() {
// generate random data
int rows = 100000;
int dimensions = 128;
std::vector<std::vector<float>> embeddings;
embeddings.reserve(rows);
std::mt19937_64 prng;
std::uniform_real_distribution<float> dist(0, 1);
for (int i = 0; i < rows; i++) {
std::vector<float> embedding;
embedding.reserve(dimensions);
for (int j = 0; j < dimensions; j++) {
embedding.push_back(dist(prng));
}
embeddings.push_back(embedding);
}
// enable extension
pqxx::connection conn("dbname=pgvector_example");
pqxx::nontransaction tx(conn);
tx.exec("CREATE EXTENSION IF NOT EXISTS vector");
// create table
tx.exec("DROP TABLE IF EXISTS items");
tx.exec("CREATE TABLE items (id bigserial, embedding vector(128))");
// load data
// libpqxx does not support binary COPY
std::cout << "Loading " << rows << " rows" << std::endl;
auto stream = pqxx::stream_to::table(tx, {"items"}, {"embedding"});
for (size_t i = 0; i < embeddings.size(); i++) {
// show progress
if (i % 10000 == 0) {
std::cout << '.' << std::flush;
}
stream << pgvector::Vector(embeddings[i]);
}
stream.complete();
std::cout << std::endl << "Success!" << std::endl;
// create any indexes *after* loading initial data (skipping for this example)
bool create_index = false;
if (create_index) {
std::cout << "Creating index" << std::endl;
tx.exec("SET maintenance_work_mem = '8GB'");
tx.exec("SET max_parallel_maintenance_workers = 7");
tx.exec("CREATE INDEX ON items USING hnsw (embedding vector_cosine_ops)");
}
// update planner statistics for good measure
tx.exec("ANALYZE items");
return 0;
}