-
Notifications
You must be signed in to change notification settings - Fork 320
BigQuery: fix an issue with option propagation and refactor to future-proof #540
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1075,7 +1075,7 @@ public long getEstimatedSizeBytes(PipelineOptions options) throws Exception { | |
public BoundedReader<TableRow> createReader(PipelineOptions options) throws IOException { | ||
BigQueryOptions bqOptions = options.as(BigQueryOptions.class); | ||
return new BigQueryReader(this, bqServices.getReaderFromQuery( | ||
bqOptions, query.get(), executingProject.get(), flattenResults, useLegacySql)); | ||
bqOptions, createBasicQueryConfig(), executingProject.get())); | ||
} | ||
|
||
@Override | ||
|
@@ -1152,11 +1152,12 @@ private void executeQuery( | |
.setProjectId(executingProject) | ||
.setJobId(jobId); | ||
|
||
// When changing options here, consider whether to change the defaults from | ||
// #createBasicQueryConfig instead. | ||
JobConfigurationQuery queryConfig = createBasicQueryConfig() | ||
.setAllowLargeResults(true) | ||
.setCreateDisposition("CREATE_IF_NEEDED") | ||
.setDestinationTable(destinationTable) | ||
.setPriority("BATCH") | ||
.setWriteDisposition("WRITE_EMPTY"); | ||
|
||
jobService.startQueryJob(jobRef, queryConfig); | ||
|
@@ -1167,9 +1168,12 @@ private void executeQuery( | |
} | ||
|
||
private JobConfigurationQuery createBasicQueryConfig() { | ||
// Due to deprecated functionality, if this function is updated | ||
// then the similar code in BigQueryTableRowIterator#fromQuery should be updated. | ||
return new JobConfigurationQuery() | ||
.setQuery(query.get()) | ||
.setFlattenResults(flattenResults) | ||
.setPriority("BATCH") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. revert BATCH There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
.setQuery(query.get()) | ||
.setUseLegacySql(useLegacySql); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,11 +46,9 @@ | |
import com.google.common.base.MoreObjects; | ||
import com.google.common.collect.ImmutableList; | ||
import com.google.common.util.concurrent.Uninterruptibles; | ||
|
||
import org.joda.time.Duration; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.io.IOException; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
|
@@ -61,7 +59,6 @@ | |
import java.util.Objects; | ||
import java.util.Random; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import javax.annotation.Nullable; | ||
|
||
/** | ||
|
@@ -73,6 +70,7 @@ public class BigQueryTableRowIterator implements AutoCloseable { | |
@Nullable private TableReference ref; | ||
@Nullable private final String projectId; | ||
@Nullable private TableSchema schema; | ||
@Nullable private final JobConfigurationQuery queryConfig; | ||
private final Bigquery client; | ||
private String pageToken; | ||
private Iterator<TableRow> iteratorOverCurrentBatch; | ||
|
@@ -89,25 +87,18 @@ public class BigQueryTableRowIterator implements AutoCloseable { | |
// following interval to check the status of query execution job | ||
private static final Duration QUERY_COMPLETION_POLL_TIME = Duration.standardSeconds(1); | ||
|
||
private final String query; | ||
// Whether to flatten query results. | ||
private final boolean flattenResults; | ||
// Whether to use the BigQuery legacy SQL dialect.. | ||
private final boolean useLegacySql; | ||
// Temporary dataset used to store query results. | ||
private String temporaryDatasetId = null; | ||
// Temporary table used to store query results. | ||
private String temporaryTableId = null; | ||
|
||
private BigQueryTableRowIterator( | ||
@Nullable TableReference ref, @Nullable String query, @Nullable String projectId, | ||
Bigquery client, boolean flattenResults, boolean useLegacySql) { | ||
@Nullable TableReference ref, @Nullable JobConfigurationQuery queryConfig, | ||
@Nullable String projectId, Bigquery client) { | ||
this.ref = ref; | ||
this.query = query; | ||
this.queryConfig = queryConfig; | ||
this.projectId = projectId; | ||
this.client = checkNotNull(client, "client"); | ||
this.flattenResults = flattenResults; | ||
this.useLegacySql = useLegacySql; | ||
} | ||
|
||
/** | ||
|
@@ -116,7 +107,7 @@ private BigQueryTableRowIterator( | |
public static BigQueryTableRowIterator fromTable(TableReference ref, Bigquery client) { | ||
checkNotNull(ref, "ref"); | ||
checkNotNull(client, "client"); | ||
return new BigQueryTableRowIterator(ref, null, ref.getProjectId(), client, true, true); | ||
return new BigQueryTableRowIterator(ref, null, ref.getProjectId(), client); | ||
} | ||
|
||
/** | ||
|
@@ -135,23 +126,39 @@ public static BigQueryTableRowIterator fromQuery( | |
* Constructs a {@code BigQueryTableRowIterator} that reads from the results of executing the | ||
* specified query in the specified project. | ||
*/ | ||
@Deprecated | ||
public static BigQueryTableRowIterator fromQuery( | ||
String query, String projectId, Bigquery client, @Nullable Boolean flattenResults, | ||
@Nullable Boolean useLegacySql) { | ||
checkNotNull(query, "query"); | ||
checkNotNull(projectId, "projectId"); | ||
checkNotNull(client, "client"); | ||
return new BigQueryTableRowIterator(null, query, projectId, client, | ||
MoreObjects.firstNonNull(flattenResults, Boolean.TRUE), | ||
MoreObjects.firstNonNull(useLegacySql, Boolean.TRUE)); | ||
JobConfigurationQuery queryConfig = new JobConfigurationQuery() | ||
.setFlattenResults(MoreObjects.firstNonNull(flattenResults, Boolean.TRUE)) | ||
.setPriority("BATCH") | ||
.setQuery(query) | ||
.setUseLegacySql(MoreObjects.firstNonNull(useLegacySql, Boolean.TRUE)); | ||
return new BigQueryTableRowIterator(null, queryConfig, projectId, client); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not refactoring in dataflow 1.x, this is a minimal fix only. |
||
} | ||
|
||
/** | ||
* Constructs a {@code BigQueryTableRowIterator} that reads from the results of executing the | ||
* specified query in the specified project. | ||
*/ | ||
public static BigQueryTableRowIterator fromQuery( | ||
JobConfigurationQuery queryConfig, String projectId, Bigquery client) { | ||
checkNotNull(queryConfig, "queryConfig"); | ||
checkNotNull(projectId, "projectId"); | ||
checkNotNull(client, "client"); | ||
return new BigQueryTableRowIterator(null, queryConfig, projectId, client); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not refactoring in dataflow 1.x, this is a minimal fix only. |
||
} | ||
|
||
/** | ||
* Opens the table for read. | ||
* @throws IOException on failure | ||
*/ | ||
public void open() throws IOException, InterruptedException { | ||
if (query != null) { | ||
if (queryConfig != null) { | ||
ref = executeQueryAndWaitForCompletion(); | ||
} | ||
// Get table schema. | ||
|
@@ -401,15 +408,17 @@ private void deleteDataset(String datasetId) throws IOException, InterruptedExce | |
*/ | ||
private TableReference executeQueryAndWaitForCompletion() | ||
throws IOException, InterruptedException { | ||
checkState(projectId != null, "Cannot dryRun a query in unknown (null) project"); | ||
checkState(queryConfig != null, "Cannot dryRun a null query"); | ||
// Dry run query to get source table location | ||
Job dryRunJob = new Job() | ||
.setConfiguration(new JobConfiguration() | ||
.setQuery(new JobConfigurationQuery() | ||
.setQuery(query)) | ||
.setQuery(queryConfig) | ||
.setDryRun(true)); | ||
JobStatistics jobStats = executeWithBackOff( | ||
client.jobs().insert(projectId, dryRunJob), | ||
String.format("Error when trying to dry run query %s.", query)).getStatistics(); | ||
String.format("Error when trying to dry run query %s.", | ||
queryConfig.toPrettyString())).getStatistics(); | ||
|
||
// Let BigQuery to pick default location if the query does not read any tables. | ||
String location = null; | ||
|
@@ -428,30 +437,27 @@ private TableReference executeQueryAndWaitForCompletion() | |
createDataset(temporaryDatasetId, location); | ||
Job job = new Job(); | ||
JobConfiguration config = new JobConfiguration(); | ||
JobConfigurationQuery queryConfig = new JobConfigurationQuery(); | ||
config.setQuery(queryConfig); | ||
job.setConfiguration(config); | ||
queryConfig.setQuery(query); | ||
queryConfig.setAllowLargeResults(true); | ||
queryConfig.setFlattenResults(flattenResults); | ||
queryConfig.setUseLegacySql(useLegacySql); | ||
|
||
|
||
TableReference destinationTable = new TableReference(); | ||
destinationTable.setProjectId(projectId); | ||
destinationTable.setDatasetId(temporaryDatasetId); | ||
destinationTable.setTableId(temporaryTableId); | ||
queryConfig.setDestinationTable(destinationTable); | ||
queryConfig.setAllowLargeResults(Boolean.TRUE); | ||
|
||
Job queryJob = executeWithBackOff( | ||
client.jobs().insert(projectId, job), | ||
String.format("Error when trying to execute the job for query %s.", query)); | ||
String.format("Error when trying to execute the job for query %s.", | ||
queryConfig.toPrettyString())); | ||
JobReference jobId = queryJob.getJobReference(); | ||
|
||
while (true) { | ||
Job pollJob = executeWithBackOff( | ||
client.jobs().get(projectId, jobId.getJobId()), | ||
String.format("Error when trying to get status of the job for query %s.", query)); | ||
String.format("Error when trying to get status of the job for query %s.", | ||
queryConfig.toPrettyString())); | ||
JobStatus status = pollJob.getStatus(); | ||
if (status.getState().equals("DONE")) { | ||
// Job is DONE, but did not necessarily succeed. | ||
|
@@ -461,7 +467,9 @@ private TableReference executeQueryAndWaitForCompletion() | |
} else { | ||
// There will be no temporary table to delete, so null out the reference. | ||
temporaryTableId = null; | ||
throw new IOException("Executing query " + query + " failed: " + error.getMessage()); | ||
throw new IOException( | ||
String.format("Executing query %s failed: %s", | ||
queryConfig.toPrettyString(), error.getMessage())); | ||
} | ||
} | ||
Uninterruptibles.sleepUninterruptibly( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is that better to have a
public static createBasicQueryConfig(String query, boolean flattenResults, boolean useLegacySql)
and, use it in two places?