Skip to content

Commit

Permalink
[ckan#3390] datastore dump format=xml
Browse files Browse the repository at this point in the history
  • Loading branch information
wardi committed Jan 6, 2017
1 parent 45acad3 commit e5605d9
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 4 deletions.
5 changes: 4 additions & 1 deletion ckanext/datastore/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@
csv_writer,
tsv_writer,
json_writer,
xml_writer,
)

int_validator = get_validator('int_validator')
boolean_validator = get_validator('boolean_validator')

DUMP_FORMATS = 'csv', 'tsv', 'json'
DUMP_FORMATS = 'csv', 'tsv', 'json', 'xml'
PAGINATE_BY = 10000


Expand All @@ -48,6 +49,8 @@ def start_writer(columns):
return tsv_writer(response, columns, resource_id, bom)
if fmt == 'json':
return json_writer(response, columns, resource_id, bom)
if fmt == 'xml':
return xml_writer(response, columns, resource_id, bom)
abort(400, _(
u'format: must be one of %s') % u', '.join(DUMP_FORMATS))

Expand Down
52 changes: 52 additions & 0 deletions ckanext/datastore/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from contextlib import contextmanager
from email.utils import encode_rfc2231
import json
from xml.etree.cElementTree import Element, SubElement, ElementTree

import unicodecsv

Expand Down Expand Up @@ -113,3 +114,54 @@ def writerow(self, row):
ensure_ascii=False,
separators=(u',', u':'),
sort_keys=True).encode(u'utf-8'))


@contextmanager
def xml_writer(response, columns, name=None, bom=False):
u'''Context manager for writing UTF-8 XML data to response
:param response: file-like or response-like object for writing
data and headers (response-like objects only)
:param columns: list of column names
:param name: file name (for headers, response-like objects only)
:param bom: True to include a UTF-8 BOM at the start of the file
>>> with xml_writer(response, fields) as d:
>>> d.writerow(row1)
>>> d.writerow(row2)
'''

if hasattr(response, u'headers'):
response.headers['Content-Type'] = (
b'text/xml; charset=utf-8')
if name:
response.headers['Content-disposition'] = (
b'attachment; filename="{name}.xml"'.format(
name=encode_rfc2231(name)))
if bom:
response.write(UTF8_BOM)
response.write(b'<data>\n')
yield XMLWriter(response, columns)
response.write(b'</data>\n')


class XMLWriter(object):
def __init__(self, response, columns):
self.response = response
self.id_col = columns[0] == u'_id'
if self.id_col:
columns = columns[1:]
self.columns = columns

def writerow(self, row):
root = Element(u'row')
if self.id_col:
root.attrib[u'_id'] = unicode(row[0])
row = row[1:]
for k, v in zip(self.columns, row):
if v is None:
SubElement(root, k).attrib[u'xsi:nil'] = u'true'
continue
SubElement(root, k).text = unicode(v)
ElementTree(root).write(self.response, encoding=u'utf-8')
self.response.write(b'\n')
7 changes: 4 additions & 3 deletions doc/maintaining/datastore.rst
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,10 @@ A DataStore resource can be downloaded in the `CSV`_ file format from ``{CKAN-UR

For an Excel-compatible CSV file use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?bom=true``.

Other formats are also supported. For tab-separated values use
``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=tsv`` and for JSON use
``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=json``.
Other formats supported include tab-separated values (``?format=tsv``),
JSON (``?format=json``) and XML (``?format=xml``). E.g. to download an Excel-compatible
tab-separated file use
``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=tsv&bom=true``.

.. _CSV: https://en.wikipedia.org/wiki/Comma-separated_values

Expand Down

0 comments on commit e5605d9

Please sign in to comment.