Skip to content

Commit

Permalink
Added support for Tokenlist columns (#2144)
Browse files Browse the repository at this point in the history
  • Loading branch information
n-d-joshi authored Jan 27, 2025
1 parent e6a515b commit d3074fc
Show file tree
Hide file tree
Showing 12 changed files with 57 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ private Schema avroType(
case PG_BYTEA:
case PROTO:
case TOKENLIST:
case PG_SPANNER_TOKENLIST:
return SchemaBuilder.builder().bytesType();
case TIMESTAMP:
case PG_TIMESTAMPTZ:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ public static String typeString(Type type, Integer size, boolean outputAsDdlRepr
return "JSON";
case PG_JSONB:
return "jsonb";
case PG_SPANNER_TOKENLIST:
return "spanner.tokenlist";
case PROTO:
if (outputAsDdlRepresentation) {
String quote = NameUtils.identifierQuote(Dialect.GOOGLE_STANDARD_SQL);
Expand Down Expand Up @@ -353,6 +355,9 @@ public static SizedType parseSpannerType(String spannerType, Dialect dialect) {
if (spannerType.equals("spanner.commit_timestamp")) {
return t(Type.pgSpannerCommitTimestamp(), null);
}
if (spannerType.equals("spanner.tokenlist")) {
return t(Type.pgSpannerTokenlist(), null);
}
break;
}
default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ public final class Type implements Serializable {
new Type(Code.PG_ARRAY, TYPE_PG_TIMESTAMPTZ, null);
private static final Type TYPE_PG_ARRAY_DATE = new Type(Code.PG_ARRAY, TYPE_PG_DATE, null);

private static final Type TYPE_PG_SPANNER_TOKENLIST =
new Type(Code.PG_SPANNER_TOKENLIST, null, null);
private static final int AMBIGUOUS_FIELD = -1;
private static final long serialVersionUID = -3076152125004114582L;

Expand Down Expand Up @@ -209,6 +211,10 @@ public static Type pgSpannerCommitTimestamp() {
return TYPE_PG_SPANNER_COMMIT_TIMESTAMP;
}

public static Type pgSpannerTokenlist() {
return TYPE_PG_SPANNER_TOKENLIST;
}

/** Returns a descriptor for an array of {@code elementType}. */
public static Type array(Type elementType) {
Preconditions.checkNotNull(elementType);
Expand Down Expand Up @@ -353,6 +359,7 @@ public enum Code {
PG_TIMESTAMPTZ("timestamp with time zone", Dialect.POSTGRESQL),
PG_DATE("date", Dialect.POSTGRESQL),
PG_SPANNER_COMMIT_TIMESTAMP("spanner.commit_timestamp", Dialect.POSTGRESQL),
PG_SPANNER_TOKENLIST("spanner.tokenlist", Dialect.POSTGRESQL),
PG_ARRAY("array", Dialect.POSTGRESQL);

private final String name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,7 @@ public void prettyPrint(Appendable appendable) throws IOException {
appendable.append(" PLACEMENT KEY");
}
if (isHidden()) {
if (dialect() == Dialect.GOOGLE_STANDARD_SQL) {
appendable.append(" HIDDEN");
}
appendable.append(" HIDDEN");
}
if (columnOptions() == null) {
return;
Expand Down Expand Up @@ -347,6 +345,10 @@ public Builder pgJsonb() {
return type(Type.pgJsonb());
}

public Builder pgSpannerTokenlist() {
return type(Type.pgSpannerTokenlist());
}

public Builder proto(String protoTypeFqn) {
return type(Type.proto(protoTypeFqn));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,11 +360,14 @@ private void listColumns(Ddl.Builder builder) {
resultSet.isNull(13) ? null : Long.valueOf(resultSet.getString(13));
Long identitySkipRangeMax =
resultSet.isNull(14) ? null : Long.valueOf(resultSet.getString(14));
boolean isHidden = dialect == Dialect.GOOGLE_STANDARD_SQL ? resultSet.getBoolean(15) : false;
boolean isHidden =
dialect == Dialect.GOOGLE_STANDARD_SQL
? resultSet.getBoolean(15)
: resultSet.getString(15).equalsIgnoreCase("YES");
boolean isPlacementKey =
dialect == Dialect.GOOGLE_STANDARD_SQL
? resultSet.getBoolean(16)
: resultSet.getBoolean(15);
: resultSet.getBoolean(16);

builder
.createTable(tableName)
Expand Down Expand Up @@ -418,8 +421,8 @@ Statement listColumnsSQL() {
"SELECT c.table_schema, c.table_name, c.column_name,"
+ " c.ordinal_position, c.spanner_type, c.is_nullable,"
+ " c.is_generated, c.generation_expression, c.is_stored, c.column_default,"
+ " c.is_identity, c.identity_kind, c.identity_start_with_counter,"
+ " c.identity_skip_range_min, c.identity_skip_range_max,"
+ " c.is_identity, c.identity_kind, c.identity_start_with_counter, "
+ " c.identity_skip_range_min, c.identity_skip_range_max, c.is_hidden,"
+ " pkc.constraint_name IS NOT NULL AS is_placement_key"
+ " FROM information_schema.columns as c"
+ " LEFT JOIN placementkeycolumns AS pkc"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,9 @@ private static Type parseSpannerType(String spannerType, Dialect dialect) {
if ("SPANNER.COMMIT_TIMESTAMP".equals(spannerType)) {
return Type.timestamp();
}
if ("SPANNER.TOKENLIST".equals(spannerType)) {
return Type.bytes();
}
throw new IllegalArgumentException("Unknown spanner type " + spannerType);
default:
throw new IllegalArgumentException("Unrecognized dialect: " + dialect.name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,14 @@ public void pgSimple() {
+ " \"type\" : [ \"null\", \"string\" ],"
+ " \"sqlType\" : \"spanner.commit_timestamp\""
+ " }, {"
+ " \"name\" : \"tokens\", "
+ " \"type\" : [\"null\"], "
+ " \"sqlType\" : \"spanner.tokenlist\","
+ " \"notNull\" : \"false\","
+ " \"generationExpression\" : \"spanner.tokenize_fulltext(first_name)\","
+ " \"stored\": \"true\","
+ " \"hidden\": \"true\""
+ " }, {"
+ " \"name\" : \"date\","
+ " \"type\" : [ \"null\", \"string\" ],"
+ " \"sqlType\" : \"date\""
Expand Down Expand Up @@ -477,6 +485,7 @@ public void pgSimple() {
+ " \"text\" text,"
+ " \"timestamptz\" timestamp with time zone,"
+ " \"commit_time\" spanner.commit_timestamp,"
+ " \"tokens\" spanner.tokenlist GENERATED ALWAYS AS (spanner.tokenize_fulltext(first_name)) STORED HIDDEN,"
+ " \"date\" date,"
+ " \"varcharArr1\" character varying[],"
+ " \"varcharArr2\" character varying[],"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,11 @@ public void pgSimple() {
.skipRangeMin(2000L)
.skipRangeMax(3000L)
.endColumn()
.column("tokens")
.pgSpannerTokenlist()
.generatedAs("(spanner.tokenize_fulltext(full_name))")
.isHidden(true)
.endColumn()
.primaryKey()
.asc("id")
.asc("gen_id")
Expand Down Expand Up @@ -407,7 +412,7 @@ public void pgSimple() {

List<Schema.Field> fields = avroSchema.getFields();

assertThat(fields, hasSize(7));
assertThat(fields, hasSize(8));

assertThat(fields.get(0).name(), equalTo("id"));
// Not null
Expand Down Expand Up @@ -472,6 +477,15 @@ public void pgSimple() {
assertThat(fields.get(6).getProp(SPANNER_SEQUENCE_SKIP_RANGE_MIN), equalTo("2000"));
assertThat(fields.get(6).getProp(SPANNER_SEQUENCE_SKIP_RANGE_MAX), equalTo("3000"));

assertThat(fields.get(7).name(), equalTo("tokens"));
assertThat(fields.get(7).schema(), equalTo(Schema.create(Schema.Type.NULL)));
assertThat(fields.get(7).getProp(SQL_TYPE), equalTo("spanner.tokenlist"));
assertThat(fields.get(7).getProp(NOT_NULL), equalTo("false"));
assertThat(
fields.get(7).getProp(GENERATION_EXPRESSION),
equalTo("(spanner.tokenize_fulltext(full_name))"));
assertThat(fields.get(7).getProp(HIDDEN), equalTo("true"));
assertThat(fields.get(7).getProp(DEFAULT_EXPRESSION), equalTo(null));
// spanner pk
assertThat(avroSchema.getProp(SPANNER_PRIMARY_KEY + "_0"), equalTo("\"id\" ASC"));
assertThat(avroSchema.getProp(SPANNER_PRIMARY_KEY + "_1"), equalTo("\"gen_id\" ASC"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ private void testPostgresSpannerToGCSAvroBase(
+ " \"FirstName\" character varying(256),\n"
+ " \"LastName\" character varying(256),\n"
+ " \"Rating\" real,\n"
+ " \"NameTokens\" spanner.tokenlist generated always as (spanner.tokenize_fulltext(\"FirstName\")) stored hidden,\n"
+ "PRIMARY KEY(\"Id\"))",
testName);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ private void testPostgresImportPipelineBase(
+ " \"Id\" bigint,\n"
+ " \"FirstName\" character varying(256),\n"
+ " \"LastName\" character varying(256),\n"
+ " \"NameTokens\" spanner.tokenlist generated always as (spanner.tokenize_fulltext(\"FirstName\")) stored hidden,\n"
+ "PRIMARY KEY(\"Id\"))";
spannerResourceManager.executeDdlStatement(createSingersTableStatement);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public void testListColumnsSQL() {
+ " SELECT c.table_schema, c.table_name, c.column_name, c.ordinal_position, c.spanner_type, c.is_nullable,"
+ " c.is_generated, c.generation_expression, c.is_stored, c.column_default,"
+ " c.is_identity, c.identity_kind, c.identity_start_with_counter,"
+ " c.identity_skip_range_min, c.identity_skip_range_max,"
+ " c.identity_skip_range_min, c.identity_skip_range_max, c.is_hidden,"
+ " pkc.constraint_name IS NOT NULL AS is_placement_key"
+ " FROM information_schema.columns as c"
+ " LEFT JOIN placementkeycolumns AS pkc"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,11 @@ public void testSinglePgTable() throws Exception {
.addColumn("test", "jsonbCol", "jsonb")
.addColumn("test", "arrayCol", "DOUBLE PRECISION[]")
.addColumn("test", "embeddingVectorCol", "DOUBLE PRECISION[] VECTOR LENGTH 16")
.addColumn("test", "tokens", "spanner.tokenlist")
.build();

assertEquals(1, schema.getTables().size());
assertEquals(7, schema.getColumns("test").size());
assertEquals(8, schema.getColumns("test").size());
assertEquals(1, schema.getKeyParts("test").size());
assertEquals(Type.timestamp(), schema.getColumns("test").get(3).getType());
assertEquals(Type.array(Type.float64()), schema.getColumns("test").get(5).getType());
Expand Down

0 comments on commit d3074fc

Please sign in to comment.