Skip to content

Commit 1f31516

Browse files
jirassimokfindepi
authored andcommitted
Fix reading invalid Avro schema written by Avro 1.8.2
Certain schemas written by Avro 1.8.2 were invalid and rejected by Avro 1.9.2. Upgradet to Avro 1.10.2 unlocks ability to read these schemas (by disabling validation). This required additional code change in `trino-hive-apache`.
1 parent 3dc90ce commit 1f31516

File tree

4 files changed

+123
-1
lines changed

4 files changed

+123
-1
lines changed

pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@
554554
<dependency>
555555
<groupId>io.trino.hive</groupId>
556556
<artifactId>hive-apache</artifactId>
557-
<version>3.1.2-16</version>
557+
<version>3.1.2-17</version>
558558
</dependency>
559559

560560
<dependency>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"type": "record",
3+
"name": "invalid_union_default",
4+
"namespace": "io.trino.test.avroschema",
5+
"doc": "This schema has columns with illegal null defaults. These should be illegal and are rejected by Avro 1.9.2, but are allowed by Avro 1.8.2 and can be read by Avro 1.10.1.",
6+
"fields": [
7+
{
8+
"name": "valid",
9+
"doc": "A union with a legal null default",
10+
"default": null,
11+
"type": ["null", "string"]
12+
},
13+
{
14+
"name": "invalid",
15+
"doc": "Invalid: the default of a union should correspond to the first type",
16+
"default": null,
17+
"type": ["string", "null"]
18+
}
19+
]
20+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.tests.product.hive;
15+
16+
import io.trino.tempto.ProductTest;
17+
import io.trino.tempto.hadoop.hdfs.HdfsClient;
18+
import org.testng.annotations.Test;
19+
20+
import javax.inject.Inject;
21+
22+
import java.io.IOException;
23+
import java.io.InputStream;
24+
import java.nio.file.Path;
25+
26+
import static io.trino.tempto.assertions.QueryAssert.Row.row;
27+
import static io.trino.tempto.assertions.QueryAssert.assertThat;
28+
import static io.trino.tempto.query.QueryExecutor.param;
29+
import static io.trino.tempto.query.QueryExecutor.query;
30+
import static io.trino.tests.product.TestGroups.AVRO;
31+
import static io.trino.tests.product.utils.QueryExecutors.onTrino;
32+
import static java.lang.String.format;
33+
import static java.nio.file.Files.newInputStream;
34+
import static java.sql.JDBCType.VARCHAR;
35+
36+
/**
37+
* Avro 1.8.2 was not strict about schema validation, causing the more-strict
38+
* Avro 1.9.2 to cause failures with some schemas that previously worked. This
39+
* was resolved in Avro 1.10.2's reader.
40+
*
41+
* <p>This class tests that Trino can read some data written with invalid
42+
* schemas by Avro 1.8.2.
43+
*/
44+
public class TestAvroSchemaStrictness
45+
extends ProductTest
46+
{
47+
private static final Path ILLEGAL_UNION_DEFAULT_SCHEMA = Path.of("/docker/presto-product-tests/avro/invalid_default.avsc");
48+
/**
49+
* The data in the avro data file was generated from the following JSON data using avro-tools.
50+
* <pre>
51+
* {"valid": {"string": "valid"}, "invalid": {"string": "invalid"}}
52+
* {"valid": null, "invalid": null}
53+
* </pre>
54+
*
55+
* The command used:
56+
* <pre>{@code
57+
* java.jar avro-tools-1.8.2.jar fromjson --schema-file "$SCHEMA_FILE" data.json > invalid_default.avro
58+
* }</pre>
59+
*/
60+
private static final Path ILLEGAL_UNION_DEFAULT_DATA = Path.of("/docker/presto-product-tests/avro/invalid_default.avro");
61+
62+
@Inject
63+
private HdfsClient hdfsClient;
64+
65+
@Test(groups = AVRO)
66+
public void testInvalidUnionDefaults()
67+
throws IOException
68+
{
69+
String tableName = "invalid_union_default";
70+
String tablePath = format("/tmp/%s", tableName);
71+
String schemaPath = format("/tmp/%s.avsc", tableName);
72+
73+
hdfsClient.createDirectory(tablePath);
74+
copyToHdfs(ILLEGAL_UNION_DEFAULT_DATA, Path.of(tablePath, "data.avro").toString());
75+
copyToHdfs(ILLEGAL_UNION_DEFAULT_SCHEMA, schemaPath);
76+
77+
onTrino().executeQuery(
78+
format("CREATE TABLE %s (x int) with (\n"
79+
+ "format = 'AVRO',\n"
80+
+ "avro_schema_url = ?,\n"
81+
+ "external_location = ?\n"
82+
+ ")", tableName),
83+
param(VARCHAR, schemaPath),
84+
param(VARCHAR, tablePath));
85+
86+
assertThat(query("SELECT valid, invalid FROM " + tableName))
87+
.containsOnly(row("valid", "invalid"), row(null, null));
88+
}
89+
90+
private String getHdfsPath(String firstSubPath, String... more)
91+
{
92+
return Path.of(firstSubPath, more).toString();
93+
}
94+
95+
private void copyToHdfs(Path file, String hdfsPath)
96+
throws IOException
97+
{
98+
try (InputStream input = newInputStream(file)) {
99+
hdfsClient.saveFile(hdfsPath, input);
100+
}
101+
}
102+
}

0 commit comments

Comments
 (0)