From 0815bee4e6d0b67bcf55e977acc53e26eda9d86a Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 6 Jan 2017 16:20:49 -0500 Subject: [PATCH] [#3390] datastore dump format=json --- ckanext/datastore/controller.py | 5 ++- ckanext/datastore/writer.py | 58 +++++++++++++++++++++++++++++++-- doc/maintaining/datastore.rst | 8 +++-- 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index 559a1d2dfb7..821c5a97ab0 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -18,12 +18,13 @@ from ckanext.datastore.writer import ( csv_writer, tsv_writer, + json_writer, ) int_validator = get_validator('int_validator') boolean_validator = get_validator('boolean_validator') -DUMP_FORMATS = 'csv', 'tsv' +DUMP_FORMATS = 'csv', 'tsv', 'json' PAGINATE_BY = 10000 @@ -45,6 +46,8 @@ def start_writer(columns): return csv_writer(response, columns, resource_id, bom) if fmt == 'tsv': return tsv_writer(response, columns, resource_id, bom) + if fmt == 'json': + return json_writer(response, columns, resource_id, bom) abort(400, _( u'format: must be one of %s') % u', '.join(DUMP_FORMATS)) diff --git a/ckanext/datastore/writer.py b/ckanext/datastore/writer.py index b767220ad5a..910337124ba 100644 --- a/ckanext/datastore/writer.py +++ b/ckanext/datastore/writer.py @@ -1,4 +1,6 @@ from contextlib import contextmanager +from email.utils import encode_rfc2231 +import json import unicodecsv @@ -24,7 +26,8 @@ def csv_writer(response, columns, name=None, bom=False): response.headers['Content-Type'] = 'text/csv; charset=utf-8' if name: response.headers['Content-disposition'] = ( - 'attachment; filename="{name}.csv"'.format(name=name)) + 'attachment; filename="{name}.csv"'.format( + name=encode_rfc2231(name))) wr = unicodecsv.writer(response, encoding='utf-8') if bom: response.write(UTF8_BOM) @@ -49,13 +52,62 @@ def tsv_writer(response, columns, name=None, bom=False): if hasattr(response, 'headers'): response.headers['Content-Type'] = ( - 'text/csv;tab-separated-values charset=utf-8') + 'text/tab-separated-values; charset=utf-8') if name: response.headers['Content-disposition'] = ( - 'attachment; filename="{name}.tsv"'.format(name=name)) + 'attachment; filename="{name}.tsv"'.format( + name=encode_rfc2231(name))) wr = unicodecsv.writer( response, encoding='utf-8', dialect=unicodecsv.excel_tab) if bom: response.write(UTF8_BOM) wr.writerow(columns) yield wr + + +@contextmanager +def json_writer(response, columns, name=None, bom=False): + u'''Context manager for writing UTF-8 JSON data to response + + :param response: file-like or response-like object for writing + data and headers (response-like objects only) + :param columns: list of column names + :param name: file name (for headers, response-like objects only) + :param bom: True to include a UTF-8 BOM at the start of the file + + >>> with json_writer(response, fields) as d: + >>> d.writerow(row1) + >>> d.writerow(row2) + ''' + + if hasattr(response, 'headers'): + response.headers['Content-Type'] = ( + 'application/json; charset=utf-8') + if name: + response.headers['Content-disposition'] = ( + 'attachment; filename="{name}.json"'.format( + name=encode_rfc2231(name))) + if bom: + response.write(UTF8_BOM) + response.write(b'{\n "data": [') + yield JSONWriter(response, columns) + response.write(b'\n]}\n') + + +class JSONWriter(object): + def __init__(self, response, columns): + self.response = response + self.columns = columns + self.first = True + + def writerow(self, row): + if self.first: + self.first = False + self.response.write(b'\n ') + else: + self.response.write(b',\n ') + self.response.write(json.dumps( + {k: v for (k, v) in zip(self.columns, row)}, + ensure_ascii=False, + separators=(',', ':'), + sort_keys=True).encode('utf-8')) diff --git a/doc/maintaining/datastore.rst b/doc/maintaining/datastore.rst index a19294f6c9b..11689ad4b82 100644 --- a/doc/maintaining/datastore.rst +++ b/doc/maintaining/datastore.rst @@ -275,14 +275,16 @@ API reference .. _dump: -Download resource as CSV ------------------------- +Download resource +----------------- A DataStore resource can be downloaded in the `CSV`_ file format from ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}``. For an Excel-compatible CSV file use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?bom=true``. -For tab-separated values use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=tsv``. +Other formats are also supported. For tab-separated values use +``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=tsv`` and for JSON use +``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=json``. .. _CSV: https://en.wikipedia.org/wiki/Comma-separated_values