From 06ac65e3b5d05f356396387ed9e72ad5231c65bb Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 7 Sep 2017 17:45:54 -0400 Subject: [PATCH] Add new scalar statistics properties to 'QueryJob' (#3800) * Add 'QueryJob.total_bytes_processed' property. * Add 'QueryJob.total_bytes_billed' property. * Add 'QueryJob.billing_tier' property. * Add 'QueryJob.cache_hit' property. * Add 'QueryJob.num_dml_affected_rows' property. * Add 'QueryJob.statement_type' property. --- bigquery/google/cloud/bigquery/job.py | 87 ++++++++++++++++++++++++++ bigquery/tests/unit/test_job.py | 89 +++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 9085ca650612..bbe9b78ddade 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1299,6 +1299,93 @@ def query_plan(self): plan_entries = self._job_statistics().get('queryPlan', ()) return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] + @property + def total_bytes_processed(self): + """Return total bytes processed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesProcessed + + :rtype: int or None + :returns: total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('totalBytesProcessed') + if result is not None: + result = int(result) + return result + + @property + def total_bytes_billed(self): + """Return total bytes billed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesBilled + + :rtype: int or None + :returns: total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('totalBytesBilled') + if result is not None: + result = int(result) + return result + + @property + def billing_tier(self): + """Return billing tier from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.billingTier + + :rtype: int or None + :returns: billing tier used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get('billingTier') + + @property + def cache_hit(self): + """Return billing tier from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.cacheHit + + :rtype: bool or None + :returns: whether the query results were returned from cache, or None + if job is not yet complete. + """ + return self._job_statistics().get('cacheHit') + + @property + def num_dml_affected_rows(self): + """Return total bytes billed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.numDmlAffectedRows + + :rtype: int or None + :returns: number of DML rows affectd by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get('numDmlAffectedRows') + if result is not None: + result = int(result) + return result + + @property + def statement_type(self): + """Return statement type from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.statementType + + :rtype: str or None + :returns: type of statement used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get('statementType') + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 5daecdef0b94..677fa259f1c4 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1702,6 +1702,95 @@ def test_query_plan(self): self.assertEqual(f_step.kind, e_step['kind']) self.assertEqual(f_step.substeps, e_step['substeps']) + def test_total_bytes_processed(self): + total_bytes = 1234 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.total_bytes_processed) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats['totalBytesProcessed'] = str(total_bytes) + self.assertEqual(job.total_bytes_processed, total_bytes) + + def test_total_bytes_billed(self): + total_bytes = 1234 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.total_bytes_billed) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats['totalBytesBilled'] = str(total_bytes) + self.assertEqual(job.total_bytes_billed, total_bytes) + + def test_billing_tier(self): + billing_tier = 1 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.billing_tier) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.billing_tier) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.billing_tier) + + query_stats['billingTier'] = billing_tier + self.assertEqual(job.billing_tier, billing_tier) + + def test_cache_hit(self): + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.cache_hit) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.cache_hit) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.cache_hit) + + query_stats['cacheHit'] = True + self.assertTrue(job.cache_hit) + + def test_num_dml_affected_rows(self): + num_rows = 1234 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.num_dml_affected_rows) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats['numDmlAffectedRows'] = str(num_rows) + self.assertEqual(job.num_dml_affected_rows, num_rows) + + def test_statement_type(self): + statement_type = 'SELECT' + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.statement_type) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.statement_type) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.statement_type) + + query_stats['statementType'] = statement_type + self.assertEqual(job.statement_type, statement_type) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults