From 92ad0cb5be3fcf198d5abf9c979fbfaa95dceaea Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 24 Jul 2019 18:34:34 +0200 Subject: [PATCH] Include SQL query and job ID in job errors The code for query format in exception messages is a modified version of the original proposal by @bencaine1 in the feature request: https://github.com/googleapis/google-cloud-python/issues/5408#issue-327761423 --- bigquery/google/cloud/bigquery/job.py | 48 +++++++++++++++++++++++---- bigquery/tests/unit/test_job.py | 19 ++++++++++- 2 files changed, 60 insertions(+), 7 deletions(-) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 5cd90ada9637b..f323dd8026858 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -2846,6 +2846,36 @@ def _blocking_poll(self, timeout=None): self._done_timeout = timeout super(QueryJob, self)._blocking_poll(timeout=timeout) + @staticmethod + def _format_for_exception(query, job_id): + """Format a query for the output in exception message. + + Args: + query (str): The SQL query to format. + job_id (str): The ID of the job that ran the query. + + Returns: (str) + A formatted query text. + """ + template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}" + + lines = query.splitlines() + max_line_len = max(len(l) for l in lines) + + header = "-----Offending SQL Follows-----" + header = "{:^{total_width}}".format(header, total_width=max_line_len + 5) + + # Print out a "ruler" above and below the SQL so we can judge columns. + # Left pad for the line numbers (4 digits plus ":"). + ruler = " |" + " . |" * (max_line_len // 10) + + # Put line numbers next to the SQL. + body = "\n".join( + "{:4}:{}".format(n, line) for n, line in enumerate(lines, start=1) + ) + + return template.format(job_id=job_id, header=header, ruler=ruler, body=body) + def result(self, timeout=None, page_size=None, retry=DEFAULT_RETRY): """Start the job and wait for it to complete and get the result. @@ -2874,12 +2904,18 @@ def result(self, timeout=None, page_size=None, retry=DEFAULT_RETRY): concurrent.futures.TimeoutError: If the job did not complete in the given timeout. """ - super(QueryJob, self).result(timeout=timeout) - # Return an iterator instead of returning the job. - if not self._query_results: - self._query_results = self._client._get_query_results( - self.job_id, retry, project=self.project, location=self.location - ) + try: + super(QueryJob, self).result(timeout=timeout) + + # Return an iterator instead of returning the job. + if not self._query_results: + self._query_results = self._client._get_query_results( + self.job_id, retry, project=self.project, location=self.location + ) + except exceptions.GoogleCloudError as exc: + new_msg = str(exc) + self._format_for_exception(self.query, self.job_id) + new_exc = type(exc)(new_msg) + raise six.raise_from(new_exc, exc) # If the query job is complete but there are no query results, this was # special job, such as a DDL query. Return an empty result set to diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index dcc90b2d96a88..6e1b99d9c18af 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -14,6 +14,7 @@ import copy import json +import textwrap import unittest import mock @@ -4256,8 +4257,15 @@ def test_result_w_page_size(self): def test_result_error(self): from google.cloud import exceptions + query = textwrap.dedent( + """ + SELECT foo, bar + FROM table_baz + WHERE foo == bar""" + ) + client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) + job = self._make_one(self.JOB_ID, query, client) error_result = { "debugInfo": "DEBUG", "location": "LOCATION", @@ -4277,6 +4285,15 @@ def test_result_error(self): self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + full_text = str(exc_info.exception) + + assert job.job_id in full_text + assert "Offending SQL Follows" in full_text + + for i, line in enumerate(query.splitlines(), start=1): + expected_line = "{}:{}".format(i, line) + assert expected_line in full_text + def test_begin_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import QueryJobConfig