Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BigQuery: Include SQL query and job ID in job errors #8748

Merged
merged 3 commits into from
Jul 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 41 additions & 6 deletions bigquery/google/cloud/bigquery/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -2846,6 +2846,36 @@ def _blocking_poll(self, timeout=None):
self._done_timeout = timeout
super(QueryJob, self)._blocking_poll(timeout=timeout)

@staticmethod
def _format_for_exception(query, job_id):
"""Format a query for the output in exception message.
Args:
query (str): The SQL query to format.
job_id (str): The ID of the job that ran the query.
Returns: (str)
A formatted query text.
"""
template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}"

lines = query.splitlines()
max_line_len = max(len(l) for l in lines)

header = "-----Query Job SQL Follows-----"
header = "{:^{total_width}}".format(header, total_width=max_line_len + 5)

# Print out a "ruler" above and below the SQL so we can judge columns.
# Left pad for the line numbers (4 digits plus ":").
ruler = " |" + " . |" * (max_line_len // 10)

# Put line numbers next to the SQL.
body = "\n".join(
"{:4}:{}".format(n, line) for n, line in enumerate(lines, start=1)
)

return template.format(job_id=job_id, header=header, ruler=ruler, body=body)

def result(self, timeout=None, page_size=None, retry=DEFAULT_RETRY):
"""Start the job and wait for it to complete and get the result.
Expand Down Expand Up @@ -2874,12 +2904,17 @@ def result(self, timeout=None, page_size=None, retry=DEFAULT_RETRY):
concurrent.futures.TimeoutError:
If the job did not complete in the given timeout.
"""
super(QueryJob, self).result(timeout=timeout)
# Return an iterator instead of returning the job.
if not self._query_results:
self._query_results = self._client._get_query_results(
self.job_id, retry, project=self.project, location=self.location
)
try:
super(QueryJob, self).result(timeout=timeout)

# Return an iterator instead of returning the job.
if not self._query_results:
self._query_results = self._client._get_query_results(
self.job_id, retry, project=self.project, location=self.location
)
except exceptions.GoogleCloudError as exc:
exc.message += self._format_for_exception(self.query, self.job_id)
raise

# If the query job is complete but there are no query results, this was
# special job, such as a DDL query. Return an empty result set to
Expand Down
19 changes: 18 additions & 1 deletion bigquery/tests/unit/test_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import copy
import json
import textwrap
import unittest

import mock
Expand Down Expand Up @@ -4256,8 +4257,15 @@ def test_result_w_page_size(self):
def test_result_error(self):
from google.cloud import exceptions

query = textwrap.dedent(
"""
SELECT foo, bar
FROM table_baz
WHERE foo == bar"""
)

client = _make_client(project=self.PROJECT)
job = self._make_one(self.JOB_ID, self.QUERY, client)
job = self._make_one(self.JOB_ID, query, client)
error_result = {
"debugInfo": "DEBUG",
"location": "LOCATION",
Expand All @@ -4277,6 +4285,15 @@ def test_result_error(self):
self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError)
self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST)

full_text = str(exc_info.exception)

assert job.job_id in full_text
assert "Query Job SQL Follows" in full_text

for i, line in enumerate(query.splitlines(), start=1):
expected_line = "{}:{}".format(i, line)
assert expected_line in full_text

def test_begin_w_bound_client(self):
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.job import QueryJobConfig
Expand Down