From 49817beb30a3cf244f3e8d7c7ed6e56b5ec69572 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 6 Jan 2017 12:39:23 -0500 Subject: [PATCH 1/9] [#3390] datastore: dump CSV for Excel with &bom=true --- ckanext/datastore/controller.py | 8 +++++++- ckanext/datastore/plugin.py | 2 +- doc/maintaining/datastore.rst | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index 97a53d98677..9e5bdec40f3 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -18,12 +18,15 @@ ) int_validator = get_validator('int_validator') +boolean_validator = get_validator('boolean_validator') + +UTF8_BOM = u'\uFEFF'.encode('utf-8') PAGINATE_BY = 10000 class DatastoreController(BaseController): - def dump(self, resource_id): + def dump_csv(self, resource_id): try: offset = int_validator(request.GET.get('offset', 0), {}) except Invalid as e: @@ -32,6 +35,7 @@ def dump(self, resource_id): limit = int_validator(request.GET.get('limit'), {}) except Invalid as e: abort(400, u'limit: ' + e.error) + bom = boolean_validator(request.GET.get('bom'), {}) wr = None while True: @@ -57,6 +61,8 @@ def dump(self, resource_id): wr = csv.writer(response, encoding='utf-8') header = [x['id'] for x in result['fields']] + if bom: + response.write(UTF8_BOM) wr.writerow(header) for record in result['records']: diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 073ffcee214..4b725fed813 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -248,7 +248,7 @@ def get_auth_functions(self): def before_map(self, m): m.connect('/datastore/dump/{resource_id}', controller='ckanext.datastore.controller:DatastoreController', - action='dump') + action='dump_csv') return m def before_show(self, resource_dict): diff --git a/doc/maintaining/datastore.rst b/doc/maintaining/datastore.rst index 6928e683141..8878d41ef6c 100644 --- a/doc/maintaining/datastore.rst +++ b/doc/maintaining/datastore.rst @@ -280,6 +280,8 @@ Download resource as CSV A DataStore resource can be downloaded in the `CSV`_ file format from ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}``. +For an Excel-compatible CSV file use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}&bom=true`` + .. _CSV: https://en.wikipedia.org/wiki/Comma-separated_values From 417f1ea363baa334b260a84c0ca12b3779f02cf8 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 6 Jan 2017 13:14:14 -0500 Subject: [PATCH 2/9] [#3390] datastore: dump TSV with format=tsv --- ckanext/datastore/controller.py | 36 ++++++++++++++++++++++++--------- ckanext/datastore/plugin.py | 2 +- doc/maintaining/datastore.rst | 4 +++- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index 9e5bdec40f3..81d3759b025 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -21,12 +21,12 @@ boolean_validator = get_validator('boolean_validator') UTF8_BOM = u'\uFEFF'.encode('utf-8') - +DUMP_FORMATS = 'csv', 'tsv' PAGINATE_BY = 10000 class DatastoreController(BaseController): - def dump_csv(self, resource_id): + def dump(self, resource_id): try: offset = int_validator(request.GET.get('offset', 0), {}) except Invalid as e: @@ -36,6 +36,30 @@ def dump_csv(self, resource_id): except Invalid as e: abort(400, u'limit: ' + e.error) bom = boolean_validator(request.GET.get('bom'), {}) + fmt = request.GET.get('format', 'csv') + + def start_writer(): + if fmt == 'csv': + response.headers['Content-Type'] = 'text/csv; charset=utf-8' + response.headers['Content-disposition'] = ( + 'attachment; filename="{name}.csv"'.format( + name=resource_id)) + wr = csv.writer(response, encoding='utf-8') + elif fmt == 'tsv': + response.headers['Content-Type'] = ( + 'text/tab-separated-values; charset=utf-8') + response.headers['Content-disposition'] = ( + 'attachment; filename="{name}.tsv"'.format( + name=resource_id)) + wr = csv.writer( + response, encoding='utf-8', dialect=csv.excel_tab) + else: + abort(400, + _(u'format: must be one of %s') % u', '.join(DUMP_FORMATS)) + + if bom: + response.write(UTF8_BOM) + return wr wr = None while True: @@ -54,15 +78,9 @@ def dump_csv(self, resource_id): abort(404, _('DataStore resource not found')) if not wr: - response.headers['Content-Type'] = 'text/csv; charset=utf-8' - response.headers['Content-disposition'] = ( - 'attachment; filename="{name}.csv"'.format( - name=resource_id)) - wr = csv.writer(response, encoding='utf-8') + wr = start_writer() header = [x['id'] for x in result['fields']] - if bom: - response.write(UTF8_BOM) wr.writerow(header) for record in result['records']: diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 4b725fed813..073ffcee214 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -248,7 +248,7 @@ def get_auth_functions(self): def before_map(self, m): m.connect('/datastore/dump/{resource_id}', controller='ckanext.datastore.controller:DatastoreController', - action='dump_csv') + action='dump') return m def before_show(self, resource_dict): diff --git a/doc/maintaining/datastore.rst b/doc/maintaining/datastore.rst index 8878d41ef6c..a19294f6c9b 100644 --- a/doc/maintaining/datastore.rst +++ b/doc/maintaining/datastore.rst @@ -280,7 +280,9 @@ Download resource as CSV A DataStore resource can be downloaded in the `CSV`_ file format from ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}``. -For an Excel-compatible CSV file use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}&bom=true`` +For an Excel-compatible CSV file use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?bom=true``. + +For tab-separated values use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=tsv``. .. _CSV: https://en.wikipedia.org/wiki/Comma-separated_values From da3b9571fe0ed4536a9ea34398c15dfc7c4faf3d Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 6 Jan 2017 15:35:54 -0500 Subject: [PATCH 3/9] [#3390] factor out csv/tsv writing into context managers --- ckanext/datastore/controller.py | 67 +++++++++++++-------------------- ckanext/datastore/writer.py | 61 ++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 40 deletions(-) create mode 100644 ckanext/datastore/writer.py diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index 81d3759b025..559a1d2dfb7 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -1,7 +1,6 @@ # encoding: utf-8 import StringIO -import unicodecsv as csv import pylons @@ -16,11 +15,14 @@ BaseController, abort, ) +from ckanext.datastore.writer import ( + csv_writer, + tsv_writer, +) int_validator = get_validator('int_validator') boolean_validator = get_validator('boolean_validator') -UTF8_BOM = u'\uFEFF'.encode('utf-8') DUMP_FORMATS = 'csv', 'tsv' PAGINATE_BY = 10000 @@ -38,36 +40,17 @@ def dump(self, resource_id): bom = boolean_validator(request.GET.get('bom'), {}) fmt = request.GET.get('format', 'csv') - def start_writer(): + def start_writer(columns): if fmt == 'csv': - response.headers['Content-Type'] = 'text/csv; charset=utf-8' - response.headers['Content-disposition'] = ( - 'attachment; filename="{name}.csv"'.format( - name=resource_id)) - wr = csv.writer(response, encoding='utf-8') - elif fmt == 'tsv': - response.headers['Content-Type'] = ( - 'text/tab-separated-values; charset=utf-8') - response.headers['Content-disposition'] = ( - 'attachment; filename="{name}.tsv"'.format( - name=resource_id)) - wr = csv.writer( - response, encoding='utf-8', dialect=csv.excel_tab) - else: - abort(400, - _(u'format: must be one of %s') % u', '.join(DUMP_FORMATS)) - - if bom: - response.write(UTF8_BOM) - return wr - - wr = None - while True: - if limit is not None and limit <= 0: - break + return csv_writer(response, columns, resource_id, bom) + if fmt == 'tsv': + return tsv_writer(response, columns, resource_id, bom) + abort(400, _( + u'format: must be one of %s') % u', '.join(DUMP_FORMATS)) + def result_page(offset, limit): try: - result = get_action('datastore_search')(None, { + return get_action('datastore_search')(None, { 'resource_id': resource_id, 'limit': PAGINATE_BY if limit is None @@ -77,17 +60,21 @@ def start_writer(): except ObjectNotFound: abort(404, _('DataStore resource not found')) - if not wr: - wr = start_writer() + result = result_page(offset, limit) + columns = [x['id'] for x in result['fields']] + + with start_writer(columns) as wr: + while True: + if limit is not None and limit <= 0: + break - header = [x['id'] for x in result['fields']] - wr.writerow(header) + for record in result['records']: + wr.writerow([record[column] for column in columns]) - for record in result['records']: - wr.writerow([record[column] for column in header]) + if len(result['records']) < PAGINATE_BY: + break + offset += PAGINATE_BY + if limit is not None: + limit -= PAGINATE_BY - if len(result['records']) < PAGINATE_BY: - break - offset += PAGINATE_BY - if limit is not None: - limit -= PAGINATE_BY + result = result_page(offset, limit) diff --git a/ckanext/datastore/writer.py b/ckanext/datastore/writer.py new file mode 100644 index 00000000000..b767220ad5a --- /dev/null +++ b/ckanext/datastore/writer.py @@ -0,0 +1,61 @@ +from contextlib import contextmanager + +import unicodecsv + +UTF8_BOM = u'\uFEFF'.encode('utf-8') + + +@contextmanager +def csv_writer(response, columns, name=None, bom=False): + u'''Context manager for writing UTF-8 CSV data to response + + :param response: file-like or response-like object for writing + data and headers (response-like objects only) + :param columns: list of column names + :param name: file name (for headers, response-like objects only) + :param bom: True to include a UTF-8 BOM at the start of the file + + >>> with csv_writer(response, fields) as d: + >>> d.writerow(row1) + >>> d.writerow(row2) + ''' + + if hasattr(response, 'headers'): + response.headers['Content-Type'] = 'text/csv; charset=utf-8' + if name: + response.headers['Content-disposition'] = ( + 'attachment; filename="{name}.csv"'.format(name=name)) + wr = unicodecsv.writer(response, encoding='utf-8') + if bom: + response.write(UTF8_BOM) + wr.writerow(columns) + yield wr + + +@contextmanager +def tsv_writer(response, columns, name=None, bom=False): + u'''Context manager for writing UTF-8 TSV data to response + + :param response: file-like or response-like object for writing + data and headers (response-like objects only) + :param columns: list of column names + :param name: file name (for headers, response-like objects only) + :param bom: True to include a UTF-8 BOM at the start of the file + + >>> with tsv_writer(response, fields) as d: + >>> d.writerow(row1) + >>> d.writerow(row2) + ''' + + if hasattr(response, 'headers'): + response.headers['Content-Type'] = ( + 'text/csv;tab-separated-values charset=utf-8') + if name: + response.headers['Content-disposition'] = ( + 'attachment; filename="{name}.tsv"'.format(name=name)) + wr = unicodecsv.writer( + response, encoding='utf-8', dialect=unicodecsv.excel_tab) + if bom: + response.write(UTF8_BOM) + wr.writerow(columns) + yield wr From 0815bee4e6d0b67bcf55e977acc53e26eda9d86a Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 6 Jan 2017 16:20:49 -0500 Subject: [PATCH 4/9] [#3390] datastore dump format=json --- ckanext/datastore/controller.py | 5 ++- ckanext/datastore/writer.py | 58 +++++++++++++++++++++++++++++++-- doc/maintaining/datastore.rst | 8 +++-- 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index 559a1d2dfb7..821c5a97ab0 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -18,12 +18,13 @@ from ckanext.datastore.writer import ( csv_writer, tsv_writer, + json_writer, ) int_validator = get_validator('int_validator') boolean_validator = get_validator('boolean_validator') -DUMP_FORMATS = 'csv', 'tsv' +DUMP_FORMATS = 'csv', 'tsv', 'json' PAGINATE_BY = 10000 @@ -45,6 +46,8 @@ def start_writer(columns): return csv_writer(response, columns, resource_id, bom) if fmt == 'tsv': return tsv_writer(response, columns, resource_id, bom) + if fmt == 'json': + return json_writer(response, columns, resource_id, bom) abort(400, _( u'format: must be one of %s') % u', '.join(DUMP_FORMATS)) diff --git a/ckanext/datastore/writer.py b/ckanext/datastore/writer.py index b767220ad5a..910337124ba 100644 --- a/ckanext/datastore/writer.py +++ b/ckanext/datastore/writer.py @@ -1,4 +1,6 @@ from contextlib import contextmanager +from email.utils import encode_rfc2231 +import json import unicodecsv @@ -24,7 +26,8 @@ def csv_writer(response, columns, name=None, bom=False): response.headers['Content-Type'] = 'text/csv; charset=utf-8' if name: response.headers['Content-disposition'] = ( - 'attachment; filename="{name}.csv"'.format(name=name)) + 'attachment; filename="{name}.csv"'.format( + name=encode_rfc2231(name))) wr = unicodecsv.writer(response, encoding='utf-8') if bom: response.write(UTF8_BOM) @@ -49,13 +52,62 @@ def tsv_writer(response, columns, name=None, bom=False): if hasattr(response, 'headers'): response.headers['Content-Type'] = ( - 'text/csv;tab-separated-values charset=utf-8') + 'text/tab-separated-values; charset=utf-8') if name: response.headers['Content-disposition'] = ( - 'attachment; filename="{name}.tsv"'.format(name=name)) + 'attachment; filename="{name}.tsv"'.format( + name=encode_rfc2231(name))) wr = unicodecsv.writer( response, encoding='utf-8', dialect=unicodecsv.excel_tab) if bom: response.write(UTF8_BOM) wr.writerow(columns) yield wr + + +@contextmanager +def json_writer(response, columns, name=None, bom=False): + u'''Context manager for writing UTF-8 JSON data to response + + :param response: file-like or response-like object for writing + data and headers (response-like objects only) + :param columns: list of column names + :param name: file name (for headers, response-like objects only) + :param bom: True to include a UTF-8 BOM at the start of the file + + >>> with json_writer(response, fields) as d: + >>> d.writerow(row1) + >>> d.writerow(row2) + ''' + + if hasattr(response, 'headers'): + response.headers['Content-Type'] = ( + 'application/json; charset=utf-8') + if name: + response.headers['Content-disposition'] = ( + 'attachment; filename="{name}.json"'.format( + name=encode_rfc2231(name))) + if bom: + response.write(UTF8_BOM) + response.write(b'{\n "data": [') + yield JSONWriter(response, columns) + response.write(b'\n]}\n') + + +class JSONWriter(object): + def __init__(self, response, columns): + self.response = response + self.columns = columns + self.first = True + + def writerow(self, row): + if self.first: + self.first = False + self.response.write(b'\n ') + else: + self.response.write(b',\n ') + self.response.write(json.dumps( + {k: v for (k, v) in zip(self.columns, row)}, + ensure_ascii=False, + separators=(',', ':'), + sort_keys=True).encode('utf-8')) diff --git a/doc/maintaining/datastore.rst b/doc/maintaining/datastore.rst index a19294f6c9b..11689ad4b82 100644 --- a/doc/maintaining/datastore.rst +++ b/doc/maintaining/datastore.rst @@ -275,14 +275,16 @@ API reference .. _dump: -Download resource as CSV ------------------------- +Download resource +----------------- A DataStore resource can be downloaded in the `CSV`_ file format from ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}``. For an Excel-compatible CSV file use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?bom=true``. -For tab-separated values use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=tsv``. +Other formats are also supported. For tab-separated values use +``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=tsv`` and for JSON use +``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=json``. .. _CSV: https://en.wikipedia.org/wiki/Comma-separated_values From 45acad3e1a42476d4bab54e4a268aa6d50f30696 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 6 Jan 2017 16:31:14 -0500 Subject: [PATCH 5/9] [#3390] file encoding, unprefixed strings --- ckanext/datastore/writer.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/ckanext/datastore/writer.py b/ckanext/datastore/writer.py index 910337124ba..9c9f6d39430 100644 --- a/ckanext/datastore/writer.py +++ b/ckanext/datastore/writer.py @@ -1,10 +1,12 @@ +# encoding: utf-8 + from contextlib import contextmanager from email.utils import encode_rfc2231 import json import unicodecsv -UTF8_BOM = u'\uFEFF'.encode('utf-8') +UTF8_BOM = u'\uFEFF'.encode(u'utf-8') @contextmanager @@ -22,13 +24,13 @@ def csv_writer(response, columns, name=None, bom=False): >>> d.writerow(row2) ''' - if hasattr(response, 'headers'): - response.headers['Content-Type'] = 'text/csv; charset=utf-8' + if hasattr(response, u'headers'): + response.headers['Content-Type'] = b'text/csv; charset=utf-8' if name: response.headers['Content-disposition'] = ( - 'attachment; filename="{name}.csv"'.format( + b'attachment; filename="{name}.csv"'.format( name=encode_rfc2231(name))) - wr = unicodecsv.writer(response, encoding='utf-8') + wr = unicodecsv.writer(response, encoding=u'utf-8') if bom: response.write(UTF8_BOM) wr.writerow(columns) @@ -50,15 +52,15 @@ def tsv_writer(response, columns, name=None, bom=False): >>> d.writerow(row2) ''' - if hasattr(response, 'headers'): + if hasattr(response, u'headers'): response.headers['Content-Type'] = ( - 'text/tab-separated-values; charset=utf-8') + b'text/tab-separated-values; charset=utf-8') if name: response.headers['Content-disposition'] = ( - 'attachment; filename="{name}.tsv"'.format( + b'attachment; filename="{name}.tsv"'.format( name=encode_rfc2231(name))) wr = unicodecsv.writer( - response, encoding='utf-8', dialect=unicodecsv.excel_tab) + response, encoding=u'utf-8', dialect=unicodecsv.excel_tab) if bom: response.write(UTF8_BOM) wr.writerow(columns) @@ -80,12 +82,12 @@ def json_writer(response, columns, name=None, bom=False): >>> d.writerow(row2) ''' - if hasattr(response, 'headers'): + if hasattr(response, u'headers'): response.headers['Content-Type'] = ( - 'application/json; charset=utf-8') + b'application/json; charset=utf-8') if name: response.headers['Content-disposition'] = ( - 'attachment; filename="{name}.json"'.format( + b'attachment; filename="{name}.json"'.format( name=encode_rfc2231(name))) if bom: response.write(UTF8_BOM) @@ -109,5 +111,5 @@ def writerow(self, row): self.response.write(json.dumps( {k: v for (k, v) in zip(self.columns, row)}, ensure_ascii=False, - separators=(',', ':'), - sort_keys=True).encode('utf-8')) + separators=(u',', u':'), + sort_keys=True).encode(u'utf-8')) From e5605d95096c1f1cee4ec588ba8bebc5515899bf Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 6 Jan 2017 17:35:43 -0500 Subject: [PATCH 6/9] [#3390] datastore dump format=xml --- ckanext/datastore/controller.py | 5 +++- ckanext/datastore/writer.py | 52 +++++++++++++++++++++++++++++++++ doc/maintaining/datastore.rst | 7 +++-- 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index 821c5a97ab0..e13eacab3b9 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -19,12 +19,13 @@ csv_writer, tsv_writer, json_writer, + xml_writer, ) int_validator = get_validator('int_validator') boolean_validator = get_validator('boolean_validator') -DUMP_FORMATS = 'csv', 'tsv', 'json' +DUMP_FORMATS = 'csv', 'tsv', 'json', 'xml' PAGINATE_BY = 10000 @@ -48,6 +49,8 @@ def start_writer(columns): return tsv_writer(response, columns, resource_id, bom) if fmt == 'json': return json_writer(response, columns, resource_id, bom) + if fmt == 'xml': + return xml_writer(response, columns, resource_id, bom) abort(400, _( u'format: must be one of %s') % u', '.join(DUMP_FORMATS)) diff --git a/ckanext/datastore/writer.py b/ckanext/datastore/writer.py index 9c9f6d39430..9f6c6cd9f3d 100644 --- a/ckanext/datastore/writer.py +++ b/ckanext/datastore/writer.py @@ -3,6 +3,7 @@ from contextlib import contextmanager from email.utils import encode_rfc2231 import json +from xml.etree.cElementTree import Element, SubElement, ElementTree import unicodecsv @@ -113,3 +114,54 @@ def writerow(self, row): ensure_ascii=False, separators=(u',', u':'), sort_keys=True).encode(u'utf-8')) + + +@contextmanager +def xml_writer(response, columns, name=None, bom=False): + u'''Context manager for writing UTF-8 XML data to response + + :param response: file-like or response-like object for writing + data and headers (response-like objects only) + :param columns: list of column names + :param name: file name (for headers, response-like objects only) + :param bom: True to include a UTF-8 BOM at the start of the file + + >>> with xml_writer(response, fields) as d: + >>> d.writerow(row1) + >>> d.writerow(row2) + ''' + + if hasattr(response, u'headers'): + response.headers['Content-Type'] = ( + b'text/xml; charset=utf-8') + if name: + response.headers['Content-disposition'] = ( + b'attachment; filename="{name}.xml"'.format( + name=encode_rfc2231(name))) + if bom: + response.write(UTF8_BOM) + response.write(b'\n') + yield XMLWriter(response, columns) + response.write(b'\n') + + +class XMLWriter(object): + def __init__(self, response, columns): + self.response = response + self.id_col = columns[0] == u'_id' + if self.id_col: + columns = columns[1:] + self.columns = columns + + def writerow(self, row): + root = Element(u'row') + if self.id_col: + root.attrib[u'_id'] = unicode(row[0]) + row = row[1:] + for k, v in zip(self.columns, row): + if v is None: + SubElement(root, k).attrib[u'xsi:nil'] = u'true' + continue + SubElement(root, k).text = unicode(v) + ElementTree(root).write(self.response, encoding=u'utf-8') + self.response.write(b'\n') diff --git a/doc/maintaining/datastore.rst b/doc/maintaining/datastore.rst index 11689ad4b82..b1b1790d314 100644 --- a/doc/maintaining/datastore.rst +++ b/doc/maintaining/datastore.rst @@ -282,9 +282,10 @@ A DataStore resource can be downloaded in the `CSV`_ file format from ``{CKAN-UR For an Excel-compatible CSV file use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?bom=true``. -Other formats are also supported. For tab-separated values use -``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=tsv`` and for JSON use -``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=json``. +Other formats supported include tab-separated values (``?format=tsv``), +JSON (``?format=json``) and XML (``?format=xml``). E.g. to download an Excel-compatible +tab-separated file use +``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=tsv&bom=true``. .. _CSV: https://en.wikipedia.org/wiki/Comma-separated_values From 87eea9b841b136f59a862138f28a2a4dc34c9fed Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 6 Jan 2017 17:41:46 -0500 Subject: [PATCH 7/9] [#3390] docstrings --- ckanext/datastore/writer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ckanext/datastore/writer.py b/ckanext/datastore/writer.py index 9f6c6cd9f3d..fc527f00400 100644 --- a/ckanext/datastore/writer.py +++ b/ckanext/datastore/writer.py @@ -20,7 +20,7 @@ def csv_writer(response, columns, name=None, bom=False): :param name: file name (for headers, response-like objects only) :param bom: True to include a UTF-8 BOM at the start of the file - >>> with csv_writer(response, fields) as d: + >>> with csv_writer(response, columns) as d: >>> d.writerow(row1) >>> d.writerow(row2) ''' @@ -48,7 +48,7 @@ def tsv_writer(response, columns, name=None, bom=False): :param name: file name (for headers, response-like objects only) :param bom: True to include a UTF-8 BOM at the start of the file - >>> with tsv_writer(response, fields) as d: + >>> with tsv_writer(response, columns) as d: >>> d.writerow(row1) >>> d.writerow(row2) ''' @@ -78,7 +78,7 @@ def json_writer(response, columns, name=None, bom=False): :param name: file name (for headers, response-like objects only) :param bom: True to include a UTF-8 BOM at the start of the file - >>> with json_writer(response, fields) as d: + >>> with json_writer(response, columns) as d: >>> d.writerow(row1) >>> d.writerow(row2) ''' @@ -126,7 +126,7 @@ def xml_writer(response, columns, name=None, bom=False): :param name: file name (for headers, response-like objects only) :param bom: True to include a UTF-8 BOM at the start of the file - >>> with xml_writer(response, fields) as d: + >>> with xml_writer(response, columns) as d: >>> d.writerow(row1) >>> d.writerow(row2) ''' From a6b595165df398d507bce7fc0a767ba0275103a9 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 6 Jan 2017 17:42:54 -0500 Subject: [PATCH 8/9] [#3390] datastore dump: more compact json export --- ckanext/datastore/writer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ckanext/datastore/writer.py b/ckanext/datastore/writer.py index fc527f00400..0860bf36ac3 100644 --- a/ckanext/datastore/writer.py +++ b/ckanext/datastore/writer.py @@ -92,7 +92,9 @@ def json_writer(response, columns, name=None, bom=False): name=encode_rfc2231(name))) if bom: response.write(UTF8_BOM) - response.write(b'{\n "data": [') + response.write( + b'{\n "columns": %s,\n "data": [' % json.dumps( + columns, ensure_ascii=False, separators=(u',', u':'))) yield JSONWriter(response, columns) response.write(b'\n]}\n') @@ -110,7 +112,7 @@ def writerow(self, row): else: self.response.write(b',\n ') self.response.write(json.dumps( - {k: v for (k, v) in zip(self.columns, row)}, + row, ensure_ascii=False, separators=(u',', u':'), sort_keys=True).encode(u'utf-8')) From 753359b8b854985e3d5d5a32bd299e876e25df83 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Thu, 12 Jan 2017 13:58:47 -0500 Subject: [PATCH 9/9] [#3390] datastore json dump: use fields, records --- ckanext/datastore/controller.py | 12 +++++------ ckanext/datastore/writer.py | 36 ++++++++++++++++----------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index e13eacab3b9..a2bd391a6ea 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -42,15 +42,15 @@ def dump(self, resource_id): bom = boolean_validator(request.GET.get('bom'), {}) fmt = request.GET.get('format', 'csv') - def start_writer(columns): + def start_writer(fields): if fmt == 'csv': - return csv_writer(response, columns, resource_id, bom) + return csv_writer(response, fields, resource_id, bom) if fmt == 'tsv': - return tsv_writer(response, columns, resource_id, bom) + return tsv_writer(response, fields, resource_id, bom) if fmt == 'json': - return json_writer(response, columns, resource_id, bom) + return json_writer(response, fields, resource_id, bom) if fmt == 'xml': - return xml_writer(response, columns, resource_id, bom) + return xml_writer(response, fields, resource_id, bom) abort(400, _( u'format: must be one of %s') % u', '.join(DUMP_FORMATS)) @@ -69,7 +69,7 @@ def result_page(offset, limit): result = result_page(offset, limit) columns = [x['id'] for x in result['fields']] - with start_writer(columns) as wr: + with start_writer(result['fields']) as wr: while True: if limit is not None and limit <= 0: break diff --git a/ckanext/datastore/writer.py b/ckanext/datastore/writer.py index 0860bf36ac3..62dc0b66619 100644 --- a/ckanext/datastore/writer.py +++ b/ckanext/datastore/writer.py @@ -11,16 +11,16 @@ @contextmanager -def csv_writer(response, columns, name=None, bom=False): +def csv_writer(response, fields, name=None, bom=False): u'''Context manager for writing UTF-8 CSV data to response :param response: file-like or response-like object for writing data and headers (response-like objects only) - :param columns: list of column names + :param fields: list of datastore fields :param name: file name (for headers, response-like objects only) :param bom: True to include a UTF-8 BOM at the start of the file - >>> with csv_writer(response, columns) as d: + >>> with csv_writer(response, fields) as d: >>> d.writerow(row1) >>> d.writerow(row2) ''' @@ -34,21 +34,21 @@ def csv_writer(response, columns, name=None, bom=False): wr = unicodecsv.writer(response, encoding=u'utf-8') if bom: response.write(UTF8_BOM) - wr.writerow(columns) + wr.writerow(f['id'] for f in fields) yield wr @contextmanager -def tsv_writer(response, columns, name=None, bom=False): +def tsv_writer(response, fields, name=None, bom=False): u'''Context manager for writing UTF-8 TSV data to response :param response: file-like or response-like object for writing data and headers (response-like objects only) - :param columns: list of column names + :param fields: list of datastore fields :param name: file name (for headers, response-like objects only) :param bom: True to include a UTF-8 BOM at the start of the file - >>> with tsv_writer(response, columns) as d: + >>> with tsv_writer(response, fields) as d: >>> d.writerow(row1) >>> d.writerow(row2) ''' @@ -64,21 +64,21 @@ def tsv_writer(response, columns, name=None, bom=False): response, encoding=u'utf-8', dialect=unicodecsv.excel_tab) if bom: response.write(UTF8_BOM) - wr.writerow(columns) + wr.writerow(f['id'] for f in fields) yield wr @contextmanager -def json_writer(response, columns, name=None, bom=False): +def json_writer(response, fields, name=None, bom=False): u'''Context manager for writing UTF-8 JSON data to response :param response: file-like or response-like object for writing data and headers (response-like objects only) - :param columns: list of column names + :param fields: list of datastore fields :param name: file name (for headers, response-like objects only) :param bom: True to include a UTF-8 BOM at the start of the file - >>> with json_writer(response, columns) as d: + >>> with json_writer(response, fields) as d: >>> d.writerow(row1) >>> d.writerow(row2) ''' @@ -93,9 +93,9 @@ def json_writer(response, columns, name=None, bom=False): if bom: response.write(UTF8_BOM) response.write( - b'{\n "columns": %s,\n "data": [' % json.dumps( - columns, ensure_ascii=False, separators=(u',', u':'))) - yield JSONWriter(response, columns) + b'{\n "fields": %s,\n "records": [' % json.dumps( + fields, ensure_ascii=False, separators=(u',', u':'))) + yield JSONWriter(response, [f['id'] for f in fields]) response.write(b'\n]}\n') @@ -119,16 +119,16 @@ def writerow(self, row): @contextmanager -def xml_writer(response, columns, name=None, bom=False): +def xml_writer(response, fields, name=None, bom=False): u'''Context manager for writing UTF-8 XML data to response :param response: file-like or response-like object for writing data and headers (response-like objects only) - :param columns: list of column names + :param fields: list of datastore fields :param name: file name (for headers, response-like objects only) :param bom: True to include a UTF-8 BOM at the start of the file - >>> with xml_writer(response, columns) as d: + >>> with xml_writer(response, fields) as d: >>> d.writerow(row1) >>> d.writerow(row2) ''' @@ -143,7 +143,7 @@ def xml_writer(response, columns, name=None, bom=False): if bom: response.write(UTF8_BOM) response.write(b'\n') - yield XMLWriter(response, columns) + yield XMLWriter(response, [f['id'] for f in fields]) response.write(b'\n')