{% endblock %} + +{% block main_content %} +
+
+ {% block form %} +

{{ _('Are you sure you want to delete the DataStore and Data Dictionary?') }}

+

+

+ {{ h.csrf_input() if 'csrf_input' in h }} + + +
+

+ {% endblock %} +
+
+{% endblock %} diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html index f533d1e2..98027508 100644 --- a/ckanext/xloader/templates/xloader/resource_data.html +++ b/ckanext/xloader/templates/xloader/resource_data.html @@ -4,17 +4,36 @@ {% block primary_content_inner %} - {% set action = h.url_for('xloader.resource_data', id=pkg.name, resource_id=res.id) %} {% set show_table = true %} {% block upload_ds_button %} -
+ {% set action = h.url_for('xloader.resource_data', id=pkg.name, resource_id=res.id) %} + + {{ h.csrf_input() if 'csrf_input' in h }}
{% endblock %} +
+ + {% block delete_ds_button %} + {% if res.datastore_active %} + {% set delete_action = h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) %} +
+ {{ h.csrf_input() if 'csrf_input' in h }} + {% block delete_datastore_button_text %} {{ _('Delete from DataStore') }}{% endblock %} +
+ {% endif %} + {% endblock %} + {% if status.error and status.error.message %} {% set show_table = false %}
@@ -22,10 +41,7 @@
{% elif status.task_info and status.task_info.error %}
- {% if status.task_info.error is string %} - {# DataPusher < 0.0.3 #} - {{ _('Error:') }} {{ status.task_info.error }} - {% elif status.task_info.error is mapping %} + {% if status.task_info.error is mapping %} {{ _('Error:') }} {{ status.task_info.error.message }} {% for error_key, error_value in status.task_info.error.items() %} {% if error_key != "message" and error_value %} @@ -63,7 +79,25 @@ {% if status.status and status.task_info and show_table %}

{{ _('Upload Log') }}

    - {% for item in status.task_info.logs %} + {% set items = status.task_info.logs %} + {% set rows = rows or 50 %} + {% set skipped_rows = (items | length) - (rows * 2) %} + {% if skipped_rows > 1 %} +
  • + +

    + {{ skipped_rows }} out of {{ items | length }} logs will be hidden. +
    + + Show more   Show all + +

    +
  • + {% endif %} + {% for item in items %} + {# Truncate very long loops, showing just the start and end #} + {% if loop.index <= rows or loop.revindex <= rows + or (loop.index == rows + 1 and loop.revindex == rows + 1) %} {% set icon = 'ok' if item.level == 'INFO' else 'exclamation' %} {% set class = ' failure' if icon == 'exclamation' else ' success' %} {% set popover_content = 'test' %} @@ -79,6 +113,18 @@

    {{ _('Upload Log') }}

    + {% elif loop.index == rows + 1 %} +
  • + +

    + Skipping {{ skipped_rows }} logs... +
    + + Show more   Show all + +

    +
  • + {% endif %} {% endfor %}
  • diff --git a/ckanext/xloader/tests/ckan_setup.py b/ckanext/xloader/tests/ckan_setup.py index ae8bfb3e..ff43d74c 100644 --- a/ckanext/xloader/tests/ckan_setup.py +++ b/ckanext/xloader/tests/ckan_setup.py @@ -1,5 +1,5 @@ try: - from ckan.tests.pytest_ckan.ckan_setup import * + from ckan.tests.pytest_ckan.ckan_setup import * # noqa except ImportError: import pkg_resources from paste.deploy import loadapp diff --git a/ckanext/xloader/tests/fixtures.py b/ckanext/xloader/tests/fixtures.py index f43916ab..9a7ad37f 100644 --- a/ckanext/xloader/tests/fixtures.py +++ b/ckanext/xloader/tests/fixtures.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import sqlalchemy -import sqlalchemy.orm as orm +from sqlalchemy import orm import os from ckanext.datastore.tests import helpers as datastore_helpers @@ -11,7 +10,7 @@ ) try: - from ckan.tests.pytest_ckan.fixtures import * + from ckan.tests.pytest_ckan.fixtures import * # noqa except ImportError: import pytest diff --git a/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv b/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv new file mode 100644 index 00000000..fe2988a3 --- /dev/null +++ b/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv @@ -0,0 +1,7 @@ +Funding agency,Program title,Maximum (indicative) grant amount,Some mixed integers +DTIS,Accessible Tourism Infrastructure Grants,Five hundred thousand dollars,1 +DTIS,Boosting Accessible Tourism Experiences Grants,5000,-1 +DTIS,Some Other Grants,5 hundred thousand,0 +DTIS,Some Other Grants,$5000,6 +DTIS,Some Other Grants,$5 hundred thousand, +DTIS,Some Other Grants,"$5,000.00","5" diff --git a/ckanext/xloader/tests/samples/non_timestamp_sample.csv b/ckanext/xloader/tests/samples/non_timestamp_sample.csv new file mode 100644 index 00000000..d1b39e90 --- /dev/null +++ b/ckanext/xloader/tests/samples/non_timestamp_sample.csv @@ -0,0 +1,4 @@ +Title,Postal postcode,Latitude,Longitude,Mon am,Mon pm,Last updated +Adavale,4474,-25.9092582,144.5975769,8:00,16:00,19/07/2018 +Aramac,4726,-22.971298,145.241481,9:00-13:00,14:00-16:45,17/07/2018 +Barcaldine,4725,-23.55327901,145.289156,9:00-12:30,13:30-16:30,20/07/2018 diff --git a/ckanext/xloader/tests/samples/non_utf8_sample.csv b/ckanext/xloader/tests/samples/non_utf8_sample.csv new file mode 100644 index 00000000..334c1005 --- /dev/null +++ b/ckanext/xloader/tests/samples/non_utf8_sample.csv @@ -0,0 +1,267 @@ +"ClientId_ActNo","Owner","Amount","SenderName","DateRec","PCode" +"206681442213","MS MARIE LOUISE SEXTON ","477.05","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","3206" +"206681442214","MR DAVID SHEARER","3.79","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2213" +"206681442215","MRS M SHONK + MR E T SHONK ","10.3","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2093" +"206681442216","MS AGATHA SKOURTIS","108.42","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","3025" +"206681442217","MR JAMES SMITH","108.42","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","4811" +"206681442218","MRS JILLIAN MELINDA SMITH","602.27","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2752" +"206681442219","MISS JESSICA SARAH STEAD","174.01","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2040" +"206681442220","MISS CHAU DONG MINH TANG","542.1","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","3065" +"206681442221","MR TROY TAYLOR","240.69","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","4000" +"206681442222","MR ANDREW PHILIP THOMPSON","2.17","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2204" +"206681442223","MR IVAN CONRAD TIMBS","702.02","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2612" +"206681442224","MR J WAJNTRAUB + MRS S WAJNTRAUB ","542.1","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","3205" +"206681442225","MR HOWARD GRENVILLE WEBBER","400.61","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","4556" +"206681442226","JANI ILARI KALLA","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","6157" +"206681442227","GARY JOHN & DESLEY L CAHILL","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4816" +"206681442228","CARMEL ANASTASIA MEAGLIA","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2205" +"206681442229","ASHLEY & ANNIE BRUGGEMANN","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4671" +"206681442230","TERRY & MARY RITCHIE","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069" +"206681442231","BODY CORPORATE VILLAGE WAY CTS 19459","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4214" +"206681442232","MATHEW JOHN SHORTLAND","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2573" +"206681442233","TANYA MARIE TOWNSON","10.01","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814" +"206681442234","VENEE ELVA RUSSELL","10.02","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4035" +"206681442235","ELIZABETH FERNANCE","10.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4223" +"206681442236","CHARLES JOHN & OLWYN MARTIN","10.04","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4121" +"206681442237","ALFRED BRETT SEILER","10.05","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4129" +"206681442238","LOUISE WOODHAM & NATHAN FREY","10.07","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4400" +"206681442239","MITRA KHAKBAZ","10.09","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4005" +"206681442240","ALLAN EDWARD KILCULLEN","10.1","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4817" +"206681442241","BEVAN JOHN LISTON","10.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350" +"206681442242","KRIS MICHAEL KANKAHAINEN","10.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4107" +"206681442243","MICHAEL LYNN","10.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4860" +"206681442244","ALAN RAYMOND & GERAL BURKITT","10.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4228" +"206681442245","JENNIFER & NEVILLE MARXSEN","10.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4680" +"206681442246","DARREN MAIN GRANT & LISA MARIE GROSSKOPF","10.2","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4504" +"206681442247","PEARSON AUTOMOTIVE","10.23","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4064" +"206681442248","MR SHANE HOPE & MISS YVONNE HILTON","10.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4173" +"206681442249","CARMEL LESLEY NEILSON & WAYNE MERVYN NEILSON &","10.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4650" +"206681442250","STEPHEN KENNETH ROBERTSON","10.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740" +"206681442251","SHIH CHE LIN","10.26","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4214" +"206681442252","DAVID BRETT BROWNE","10.29","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4558" +"206681442253","NEVILLE COLIN WOODHOUSE","10.32","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814" +"206681442254","DARRYN GREGORY & PET ROBIN","10.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4178" +"206681442255","DUDLEY JESSER","10.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814" +"206681442256","MURRAY JOHN & SANDRA DIXON","10.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4870" +"206681442257","SHATHISO JOHNSON BAREKI","10.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4215" +"206681442258","ARTHUR EDWARD & MAUR MACDONALD","10.39","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4390" +"206681442259","GARY GOLDBERG","10.4","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2480" +"206681442260","PHUONG VAN NGO","10.41","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4508" +"206681442261","JACQUELYN WILSON","10.42","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3046" +"206681442262","GARTH TURTON","10.42","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051" +"206681442263","DAVID JAMES & ANNE M O'ROURKE","10.43","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4701" +"206681442264","ROBERT RUSSELL & VER MCKENZIE","10.45","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4503" +"206681442265","ESTATE OF DULCIE L SYKES","10.48","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4215" +"206681442266","LEESA GAYE OSMOND","10.51","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4671" +"206681442267","DAVID JOHN & ROSEMAR GILES","10.54","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4303" +"206681442268","SALLY & AQEEL AHMED","10.56","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350" +"206681442269","JUDITH MARJORY BURGESS","10.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3101" +"206681442270","TROY ANTONY EWART","10.61","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4301" +"206681442271","RODULFO MANOY & GEORGE HAJEK","10.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4152" +"206681442272","GLEN DUNSTAN","10.66","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3196" +"206681442273","ANNE RALSTON WRIGHT","10.73","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4825" +"206681442274","ALAN & NICOLE MAREE JACKSON","10.74","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4720" +"206681442275","DANIEL MALCOLM BROWN","10.81","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4501" +"206681442276","JENNIFER DEMERAL","10.82","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4214" +"206681442277","DARREN & LISA GARRETT","10.83","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4165" +"206681442278","LORRAINE & PETER JACKSON","10.84","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740" +"206681442279","CHERYL MADELINE CAMPBELL","10.86","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4824" +"206681442280","OLAF PETER PRILL","10.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4305" +"206681442281","AJAY GIDH","10.9","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051" +"206681442282","DEBRA JOANNE PRINDABLE","10.9","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4178" +"206681442283","MATTHEW WILLIAM CLARKE","10.96","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2914" +"206681442284","MARK STANLEY MCKENZIE","11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4207" +"206681442285","TREVOR & JANICE GARWOOD","11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4610" +"206681442286","LISA ANNE BRATINA","11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4228" +"206681442287","MICHAEL GEORGE KIRKWOOD","11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4561" +"206681442288","STEPHAN & JULIE BAWDEN","11.04","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4114" +"206681442289","PETER JOHN BOURKE","11.04","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4118" +"206681442290","TYRONE PAGE & ULRIKE","11.07","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4301" +"206681442291","SIMON ROBERT GRAY","11.08","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4006" +"206681442292","ALLAN NICHOLAS SCHWARZROCK","11.12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4650" +"206681442293","IVAN J BLAKE & JAINE RIGTER","11.12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4220" +"206681442294","DAVID MATTHEW REGINA CHRISTIE","11.12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4151" +"206681442295","GEOFFREY WAYNE & EVAN GRIGG","11.14","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4720" +"206681442296","KYLIE JANELLE HARDCASTLE","11.14","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4013" +"206681442297","PAMELA ANN WELLER","11.15","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4655" +"206681442298","JASON PATRICK & ELIZ MURPHY","11.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4511" +"206681442299","MLADEN & VESNA SAJKO","11.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4157" +"206681442300","DEAN STEPHEN BROCKENSHIRE","11.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2571" +"206681442301","LISA CHRISTOBEL BOWKER","11.22","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4066" +"206681442302","MATTHEW RAY EBBAGE","11.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4101" +"206681442303","BRIAN & GEORGINA WHITLEY","11.25","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4521" +"206681442304","HAYLEY WESTON","11.25","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4159" +"206681442305","JAMES PATRICK HOCKING","11.28","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4127" +"206681442306","ROBERT ANDREW & SARA BROWNHALL","11.29","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069" +"206681442307","EDWARD JAMES DODGSON","11.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069" +"206681442308","MELISSA JOY DODD","11.32","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069" +"206681442309","JOSHUA CALVIN BEGENT","11.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4306" +"206681442311","DORATHY AMANDA WALTERS","11.4","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4744" +"206681442312","RICHARD ROBERTS & KYM RALEIGH","11.4","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4053" +"206681442313","SAMARA INSOLL","11.48","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4212" +"206681442314","NEIL GREGORY FLESSER","11.49","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4800" +"206681442315","EUNICE GLADYS WILBRAHAM","11.51","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4570" +"206681442316","KARA NICOLE MCINNES","11.57","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4503" +"206681442317","DAVID BLYTH","11.58","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4078" +"206681442318","KEVIN & MARION KEIR","11.58","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4216" +"206681442319","FRANCES & CHARLES KEEBLE","11.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4500" +"206681442320","LYNETTE ANNE & PETER NISSEN","11.6","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069" +"206681442321","DANIEL PETER JOHNSON","11.61","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051" +"206681442322","ALLAN & EUNICE DELLAWAY","11.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4122" +"206681442323","CHRISTOPHER JOHN BEEM","11.63","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4101" +"206681442324","DAVID JAMES & KELLIE POULTON","11.64","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350" +"206681442325","MAVIS CAROLIN SCOTT","11.64","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4018" +"206681442326","REEGAN & ADAM MARTIN","11.68","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2153" +"206681442327","DENYSE B BONNEY","11.7","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4811" +"206681442328","JAMES ANDERSON","11.71","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4220" +"206681442329","SUSANNAH PINTER","11.72","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4744" +"206681442330","BRENTON MARK & KAREN GARNETT","11.78","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4306" +"206681442331","PL CAMELOT VENTURES AS TRUSTEE FOR K F T TRUST NO","11.82","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4215" +"206681442332","RON HENRY SCHMIDT","11.84","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","830" +"206681442333","ROSS COCKBURN & AUDREY KILL","11.86","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4010" +"206681442334","BENJAMIN CLARK","11.88","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4306" +"206681442335","IRIS LEAH TERESA BAKER","11.9","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2170" +"206681442336","MARK JOHN DEEBLE","11.94","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740" +"206681442337","CHRISTINE & BARRY RIGBY","11.94","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2485" +"206681442338","NATASHA ANN WOODWARD","11.97","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4869" +"206681442339","BENJAMIN JOHN CANSDALE","11.98","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4064" +"206681442340","PETER HERALD","11.98","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4184" +"206681442341","SIMON CUSHWAY","11.99","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4154" +"206681442342","ANTHONY & MICHELLE JOHNSTON","12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4551" +"206681442343","PAUL HAUCK","12.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4000" +"206681442344","RONALD ALBERT & PEAR NORTHILL","12.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4413" +"206681442345","ROBYN ELLEN SOMERS","12.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4178" +"206681442346","ROSE ANN HODGMAN","12.06","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4068" +"206681442347","JOHN & MARDI BOLTON","12.09","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4165" +"206681442348","KRYSTYNA RENNIE","12.09","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4053" +"206681442349","JOANNE BARSBY","12.12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350" +"206681442350","BRENDAN JAMES FELSCHOW","12.14","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4508" +"206681442351","MARTIN WILLIAM HARRISON","12.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4870" +"206681442352","PATRICK HEINEMANN","12.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4870" +"206681442353","ELEKRA & SPENCER RORIE","12.17","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4211" +"206681442354","ROBERT CLIVE & NOELE CROCKER","12.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4211" +"206681442355","DANIEL JOSEPH & DAVI CARMICHAEL","12.21","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4160" +"206681442356","WENBO JIANG & XIU FAN CHEN","12.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4303" +"206681442357","NOEL JEFFREY BRADY","12.27","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4550" +"206681442358","DARREN RICHARD GOSSNER & MATTHEW JOHN ANDERSON","12.29","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4500" +"206681442359","STEPHEN MICHAEL & MA JOLLY","12.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350" +"206681442360","SHONA & ARCHIE WALLACE","12.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4504" +"206681442361","ZOFIA HYS","12.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4300" +"206681442362","PIROSKA KING","12.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4154" +"206681442363","ARVIN CHAND & AMITA MOHINI","12.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4503" +"206681442364","WIETSKE GERARDINA & GAUNT","12.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4309" +"206681442365","MARK REGINALD MATTHEWS","12.39","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4811" +"206681442366","SHARP ARLEEN & CLINTON","12.4","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","6020" +"206681442367","EMOKE & LASZLO & MAR ZSOLDOS","12.41","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4005" +"206681442368","MARK & KARON KELLER","12.42","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4702" +"206681442369","JODIE KATRINA & TONY MCLACHLAN","12.43","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350" +"206681442370","ALAN WARWICK & LINDA LEWIS","12.45","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4670" +"206681442371","ADRIAN WAYNE LORRAWAY","12.5","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4702" +"206681442372","NICHOLE KRISTY MIKLOS","12.53","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4152" +"206681442373","NATASHA LEANNE HAYES","12.54","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4017" +"206681442374","KAREN LEE & DARREN J SHEEHAN","12.55","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4516" +"206681442375","RACHAEL MAY COLLINS-COOK","12.58","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4211" +"206681442376","TAMARA JUNE WEIGHT & SUSANNE ELIZABETH DEVINE","12.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814" +"206681442377","RODNEY GATES","12.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","7015" +"206681442378","REBECCA & LEE-ANNE SMITH","12.61","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","830" +"206681442379","ADAM WILLIAM JOHNSON","12.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069" +"206681442380","ZAC ASHLEY & ALEXAND MORGAN","12.63","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4165" +"206681442381","HILARY SEALY","12.64","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4211" +"206681442382","NAOMI JOHNSTONE & SCOTT LENAN","12.68","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4207" +"206681442383","WAYNE FLICKER","12.7","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2756" +"206681442384","BRENDA ANDERSON","12.71","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4811" +"206681442385","MATTHEW JAMES ALLEN","12.71","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4002" +"206681442386","MARIA-THERESIA ALTENHOFEN-CROSS & JOHN ERI CROSS","12.72","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4570" +"206681442387","MELODIE ZYLSTRA","12.72","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4570" +"206681442388","AMANDA & GRAHAM SWALLOW","12.75","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4720" +"206681442389","GRAEME ROBERT & ROBI DOHERTY","12.75","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4214" +"206681442390","GILLIAN LEIGH O'SULLIVAN","12.79","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4817" +"206681442391","JULIA MELLICK","12.84","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4178" +"206681442392","TOLISIALE & HAMAKO MAHINA","12.87","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4300" +"206681442393","SIMON JOHN STEVENS","12.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4305" +"206681442394","MICHAEL ANTHONY & DE SNELSON","12.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4817" +"206681442395","QUERIDA JO LOFTES","12.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4812" +"206681442396","LORRAINE VICTORIA DIAS","12.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4000" +"206681442397","JOHN MICHAEL TRAVIS LINLEY","12.92","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051" +"206681442398","CAROLINE HENDRY & RICHARD HOPKINS","12.93","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4067" +"206681442399","JOSH EAGLE","12.95","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4030" +"206681442400","MARK SHAWN FROST & BELINDA JEAN MARSHALL","12.95","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4221" +"206681442401","BRENT & GABRIELLE ANTHONY","12.96","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4213" +"206681442402","RICHARD SADLER","12.98","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4065" +"206681442403","GROVE FRUIT JUICE PTY LTD","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4122" +"206681442404","LEAH SPARKS","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4561" +"206681442405","JAMES MAURICE & PATR GORDON","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4870" +"206681442406","MARK JOSEPH SEARS","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4565" +"206681442407","SOPHIE VICTORIA STEWART & TREVOR MATTHEW ROWE","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4055" +"206681442408","BOBBY JAMES & SIMONE TAYLOR","13.02","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","6254" +"206681442409","PATRICK MICHAEL & ME REEVES","13.08","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4101" +"206681442410","MAURICE GROGNUZ","13.09","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4670" +"206681442411","ALAN PIGOTT & ALAN CONDER","13.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2025" +"206681442412","SAMANTHA & CAMERON SCHELBACH","13.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4309" +"206681442413","SHERIDAN ANNE ST CLAIR","13.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4164" +"206681442414","ANDREW CHRISTIE","13.17","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4521" +"206681442415","MARK ANDREW & MELISS VINTON","13.17","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4508" +"206681442416","IRWIN DOUGLAS & MARI SORENSEN","13.2","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4305" +"206681442417","CARLY SUSAN BENNETTS","13.23","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4034" +"206681442418","RYAN THORNTON","13.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2560" +"206681442419","RICHARD BAILEY","13.26","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3850" +"206681442420","DAVID IAN & EMILY RU PRYOR","13.27","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4120" +"206681442421","WILLIAM SINCLAIR","13.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4514" +"206681442422","CATHERINE LUCILLE VALENTINE & ROBERT WAREING","13.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4165" +"206681442423","RAYMOND JAMES JONES","13.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4170" +"206681442424","ANDREW STEWART T/A AWE COMMUNICATIONS","13.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4207" +"206681442425","TONY RONALD OSBOURNE","13.35","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4311" +"206681442426","MARK JOHN & LENY FIG O'HARA","13.35","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4825" +"206681442427","CECILIA ASHLEY & DAV BUTLER","13.35","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4816" +"206681442428","WILLIAM LEATHAM","13.36","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4170" +"206681442429","MAXWELL RAYMOND MATHERS & DENISE MAREE MELLARE","13.44","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4129" +"206681442430","RENE & JACQUELINE WASSERFUHR","13.44","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4556" +"206681442431","MICHAEL LEIGH KENNEDY","13.48","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4610" +"206681442432","MEDECO MEDICAL CENTRE BEENLEIGH","13.5","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4207" +"206681442433","GARY PAUL & GAYE SHELLEY","13.5","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4510" +"206681442434","STEVE & BRENDA GEIGER","13.53","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740" +"206681442435","GREGORY BERNARD JAMES","13.53","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051" +"206681442436","ROBBIE DEEBLE","13.56","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740" +"206681442437","OWEN TRAYNOR","13.56","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","6076" +"206681442438","TONI MICHELLE & SHAN MORGAN","13.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4717" +"206681442439","NICOLAS VAN HORTON","13.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4220" +"206681442440","IAN BOWDEN","13.6","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4886" +"206681442441","QUEENSLAND COUNTRY CREDIT UNION - JIMBOOMBA","13.61","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814" +"206681442442","ALANA FELLINGHAM","13.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4551" +"206681442443","ALLAN JOHN & CARMEL BETHEL","13.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4871" +"206681442444","PETER WILLIAM & ODET NORMAN","13.63","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350" +"206681442445","EMILY & MATTHEW PARSLOW","13.68","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4173" +"206681442446","JAMES OI YUEN GOCK","13.69","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2049" +"206681442447","JODIE ELIZABETH MORRISON","13.7","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4280" +"206681442448","BELINDA JANE HARNETT-PETERS & RANDALL NEI PETERS","13.74","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4017" +"206681442449","JULIEN & CHRISTIAN JUVIGNY","13.78","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4215" +"206681442450","SUSAN JOY MURRAY & THOMAS HOGAN","13.79","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4217" +"206681442451","PATRICK COLIN & HEAT HARRIS","13.8","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4506" +"206681442452","LINDY BOTHA","13.84","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4154" +"206681442453","PATRICIA LORETTA & D KNIGHT","13.85","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4650" +"206681442454","COWBURN CONSULTING PTY LTD","13.87","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4000" +"206681442455","SPENCER JAMES HAMILTON","13.9","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4300" +"206681442456","ANNA LOUISE ROSS","13.95","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4170" +"206681442457","JOHN HUGH & BOB SUTHERLAND","13.98","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4509" +"206681442458","ROBERTA MARY MACNEE","13.99","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4567" +"206681442459","MATTHEW CHRISTENSEN","14.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4152" +"206681442460","TROY & KIRSTY JEFFRIES","14.04","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4370" +"206681442461","WILLIAM GEORGE BALSDON","14.05","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4878" +"206681442462","JAIME LISA CAMPBELL & DANIEL BEVERIDGE","14.07","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4216" +"206681442463","NANCY JOHANNESSON","14.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4505" +"206681442464","JOSHUA FRANK SEIDL","14.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4122" +"206681442465","DAVID LESTER","14.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4817" +"206681442466","MATHIAS DONALD","14.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4103" +"206681442467","GLEN EVAN & HAYLEE L MARTIN","14.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350" +"206681442468","JOHN GORDON EVANS","14.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814" +"206681442469","DIANA NOYCE & LAURENCE VIZER T/A","14.2","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4502" +"206681442470","GREIG MANLEY","14.22","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3040" +"206681442471","BRENDON ANSELL","14.23","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4171" +"206681442472","CATHERINE A ROBERTSON & PAUL BROMILEY","14.27","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4064" +"206681442473","ADAM LEE & SAMANTHA RANKIN","14.28","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4132" +"206681442474","BERNICE BOYS","14.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4011" +"206681442475","HAYLEY MICHELLE BURROW","14.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2153" +"206681442476","SIONE FAUMUINA","14.42","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4815" +"206681442477","GERARD JARMAN","14.44","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3337" +"206681442478","DOUGLAS CECIL GOOLEY","14.48","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2481" +"206681442479","ANTHONY AUGUSTO HENRIQUES T/A CAFÚ VILA FRANCA","14.5","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4020" diff --git a/ckanext/xloader/tests/samples/sample_with_blanks.csv b/ckanext/xloader/tests/samples/sample_with_blanks.csv new file mode 100644 index 00000000..b53b25db --- /dev/null +++ b/ckanext/xloader/tests/samples/sample_with_blanks.csv @@ -0,0 +1,4 @@ +Funding agency,Program title,Opening date,Service ID +DTIS,Visitor First Experiences Fund,23/03/2023,63039 +DTIS,First Nations Sport and Recreation Program Round 2,22/03/2023,63040 +,,,63041 diff --git a/ckanext/xloader/tests/samples/sample_with_empty_lines.csv b/ckanext/xloader/tests/samples/sample_with_empty_lines.csv new file mode 100644 index 00000000..abc8a0dc --- /dev/null +++ b/ckanext/xloader/tests/samples/sample_with_empty_lines.csv @@ -0,0 +1,10 @@ +date,temperature,place +2011-01-01,1,Galway +2011-01-02,-1,Galway +2011-01-03,0,Galway +2011-01-01,6,Berkeley + +,,Berkeley +2011-01-03,5, + + diff --git a/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv b/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv new file mode 100644 index 00000000..a9527cf7 --- /dev/null +++ b/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv @@ -0,0 +1,136 @@ +Category,Category name,Priority,Initiative name,Investment objectives,Primary digital priority,Initiative stage,Actual start date,Approved end date,Date data current at,Percentage complete,Overall status,Project commencement allocation,Approved expenditure,Actual cost to date,Scope change event,Cost re-evaluation event,Delivery delay event,Project journey and reasons for variance,Learn more (URL) +DDSSHHESW,"Department of Defence, Social Security, Health, Housing, Education, and Silly Walks",High,Silly Walks project - Stage 2,"Lorum ipsum.",Collaboration,Delivery,01/07/1970,30/06/1971,31/03/1971,41,G,5633000,5739000,2352000,N,N,N,"As at 31 March 1971 +- Overall 'green' (on track) status +- Revised user journey following results of Silly Walk UX/UI testing +- Transition to support progressing with documentation and walk-through of the solution. +- Ongoing high levels of silly walk usage reflecting the success of search engine marketing. Silly walk focused campaign to further increase awareness and usage is being finalised. + +As at 28 February 1971 +- Overall 'green' (on track) status +- Results of Silly Walk UX/UI testing is guiding development of the revised user journey. +- Silly Walk transition to BAU support continuing with workshops, showcases and handover documentation. +- Silly Walk usage is increasing + +As at 31 January 1971 +- Continued amber status [closely monitored] with risks under management +- Search Engine Marketing -'Always On' yielding good results with continued increase in users and the proportion benefitting from Silly Walk +- Good progress on development of revised Silly Walk user journey. + +As at 31 December 1970 +Status AMBER [Closely monitored] +- Search Engine Marketing commenced 19 December 1970 and already showing increased users and proportion of customers benefitting from Silly Walk +- External assurance review completed and reported 'green' rating for confidence of delivery. + +As at 30 November 1970 +- Continued amber status pending risk management +- Marketing to commence to increase awareness of platform +- Good progress on development of revised user journey + +As at 31 October 1970 +Status AMBER [Closely monitored] +- Silly Walk Stage 2 continue reporting amber status reflective of ongoing high-level risks associated with demand-driven labour-market conditions and planned transition to support. +- Communications and engagement are in progress. +- The revised user journey continues development and testing. This is planned to be ready for release in the first quarter of 1971. As at 30 September 1970 +Status AMBER [Closely monitored] +Project journey events: +- A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness continues to progress. +- Silly Walk industries expanded to include all industries. +- Engagement with agencies continues, to heighten Silly Walk awareness and complete validation following recent expansion to encompass all industries. + +As at 31 August 1970 +Status GREEN [On track] +The project is reporting green overall. Ongoing resourcing risk will continue to be monitored and managed for the life of the project, due to a tight labour market. +Project journey events: +- A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness continues to progress. +- Further analysis of June/July 1970 marketing campaign has offered recommendations for consideration, to improve target audience awareness and Silly Walk uptake. +- Silly Walk industries expanded to include Retail Trade, Accommodation and Non-residential Construction industries finalised. +- Engagement with agencies continues, to heighten Silly Walk awareness and complete validation following recent expansion with three additional industries. + +As at 31 July 1970 +Status AMBER [Closely monitored] +The project is continuing to report amber overall mainly due to ongoing resourcing challenges. +Project journey events: +- A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness, is progressing. +- Analysis of a major marketing campaign conducted in June/July 1970 showed a significant step-up in number of Silly Walk users. +- The target of 95% of Circus population coverage was met in June 1970 with 100% of Circus population now covered on Silly Walk. +- Agency engagement for extension industries has commenced. + +As at 1 July 1970 +Silly Walk commenced work on expanding industries to include Retail Trade, Accommodation and Non-residential Construction industries. + +As at June 1970 +Stage 2 of the project is commencing and will build up the solution delivered in Silly Walk Stage 1. Customer journey will be revised in line with outcome of customer testing. The increased coverage target of at least 95% of the Circus population was met in June 1970, with all local governments included on Silly Walk. Benefits realisation through marketing and promotion of Silly Walk.",https://example.com +DDSSHHESW,"Department of Defence, Social Security, Health, Housing, Education, and Silly Walks",High,Flying Circus Modernisation and Transformation Program - Tranche 1,"The Flying Circus Modernisation and Transformation (FCMT) Program seeks to reduce the risks associated with department legacy systems by delivering contemporary, consolidated, integrated, user-friendly applications to support delivery of Flying Circus outcomes. To optimise the technical capabilities of the new solutions, engagement with business teams in the review and development of business processes is a priority. ",Trust,Delivery,01/07/1969,31/08/1971,28/02/1971,52,G,8692200,9614968,4961147,Y,Y,Y,"As at 28 February 1971 +- Tranche 1 FCMT projects continue on schedule and on budget for Tranche 1 completion by 31 August 1971. +- Customer Engagement and Contract Establishment projects continue to progress focusing on delivery activities for new CRM and Portal enhancements. +- FCMT Tranche 2 Business Case tracking for completion April 1971. + +As at 31 January 1971 +- FCMT Projects continue to track to schedule and on budget for Tranche 1 completion 31 August 1971. +- Customer Engagement and Contract Establishment Projects progressing well with delivery activities for new CRM and Portal enhancements. + +As at 31 December 1970 +Status GREEN +- FCMT projects continuing to track to board endorsed updated schedule and on budget for Tranche 1 completion on 31 August 1971. +- Customer Engagement and Contract Establishment projects completed partner onboarding and delivery activities underway. +- Planning in progress for Tranche 2, focusing on remaining legacy systems for planned commencement at completion of Tranch 1. + +As at 30 November 1970 +Status GREEN +- Tranche 1 delivery date extended to 31 August 1971 due to CRM vendor procurement delays and subsequent additional time requirements for build completion and testing of new CRM. +- All projects maintaining momentum and progressing to revised schedule within budget. + +As at 31 October 1970 +Status GREEN +-New 'Partner Portal' Digital Channel continues to perform well with 3516 registered, active, external users from 634 different organisations. Update release being planned for January 1971. +-SkillsCRM (CEP Project) delivery partner on-boarded and formal delivery stage commenced. +-Contract Establishment and Variation (CEV PRoject) continuing delivery partner select with a view to commencing prior to end of December 1970. + +As at 30 September 1970 Status GREEN. +The FCMT 'Partner Portal' solution was successfully launched on the 17 August 1970. The decommissioning of the outdated legacy application, 'WalkConnect', has completed. Work is now increasing on the next Flying Circus systems to be replaced, SkillsCRM (via the Customer Engagement Project) and Policy on Line (via the Contract Establishment and Variation Project). +Project Journey Events: +- Partner Portal. After the successful launch of Partner Portal and decommissioning of WalkConnect, the transition to BAU is underway with the Project team continuing to support business until BAU transition is completed. +- Data, Infrastructure and Reporting. +New 'Data Lake' infrastructure built. Data ingestion processes being trialled. QTS report requirement gathering underway which will showcase new capability once completed. Compliance tool SMCM successfully launched September 30. +-Customer Engagement Project (CEP). Completed assurance reviews successfully. Delivery partner selection completed. Partner and formal delivery stage due to start 18 October 1970. Ramp up of activities continuing with business demonstrations of CRM proof of concept. +-Contract Establishment and Variation (CEV). +Requirements gathering completed. Delivery partner selection process commenced. 'As is' process documentation underway. + +As at 31 August 1970 +Status GREEN. The project remains on track. Successful launch of new secure 'Partner Portal' Digital Channel for Flying Circus related organisations occurred 17 August 1970. + +Current Projects underway: +- Partner Portal. Go-live occurred on track 17 August 1970. All registered Flying Circus organisations now able to use the portal to access key applications and send information to DDSSHHESW via secure channel. Enhanced support being provided for 6 weeks. Legacy system decommissioning underway. +- Data, Infrastructure and Reporting. Build of initial Data Lake (centralised, quality, information source) continuing and requirement gathering of first report planned to use new capabilites commenced. +- Customer Services Hub (CRM). Implementation partner selection complete. Solution delivery activities due to start by end September 1970. +- Contract Engagement and Variation. Requirements gathering complete and partner selection process to commence by end September 1970. + +As at 31 July 1970 +Status GREEN + +Project journey events: +Implementation of next changes to FCMT applications remain on track for August 1970 with full launch of new secure Partner Portal Digital Channel for Flying Circus related organisations. +FCMT Program scope adjusted to include additional at risk system decommission activties during this financial year. Approved expenditure updated to align with revised scope. + +Current Projects underway +- Partner Portal. Opened for registrations 4 July 1970. Majority of Flying Circus related organisation now registered. Full access (go-live) on track to commence 17 August 1970. Legacy system to be disabled and decommissioned September 1970. +- Data, Infrastructure and Reporting. Build of initial Data Lake (centralised, quality, information source) underway with population and work on first report to commence in September. +- Customer Services Hub (CRM). Requirements confirmed and partner selection underway. Work on legacy CRM replacement due to start September/October 1970. +- Contract Engagement and Variation. Requirements gathering and new process design activities in progress. + +15 May 1970 Update +Status GREEN + +Implementation of next changes to Flying Circus applications on track for August 1970 with introduction of new secure 'Silly Portal' Digital Channel for Flying Circus related organisations. + +Projects Completed +-Database consolidation - key databases transitioned to supported versions and platforms. Completed November 1969. +-System to System Integration platform. Completed 9 May 1970. + +Current projects underway +-Partner Portal secure digital channel, in final testing. Pilot successfully complete and on track for release in August 1970. +Projects in startup +-Data, Infrastructure and Reporting, planning underway. +-Customer Services Hub (CRM), planning underway. +-Contract Engagement and Variation, planning underway. +-Planning continues for Tranche 2.",https://example.com diff --git a/ckanext/xloader/tests/samples/sample_with_quoted_commas.csv b/ckanext/xloader/tests/samples/sample_with_quoted_commas.csv new file mode 100644 index 00000000..7fe94e5b --- /dev/null +++ b/ckanext/xloader/tests/samples/sample_with_quoted_commas.csv @@ -0,0 +1,4 @@ +Funding agency,Program title,Opening date,Service ID +DTIS,"Department of Employment, Small Business and Training",23/03/2023,63039 +DTIS,"Foo, baz, meh",22/03/2023,63040 +,,,63041 diff --git a/ckanext/xloader/tests/samples/simple-large.csv b/ckanext/xloader/tests/samples/simple-large.csv index 53d3fb24..46c6c3b9 100644 --- a/ckanext/xloader/tests/samples/simple-large.csv +++ b/ckanext/xloader/tests/samples/simple-large.csv @@ -1,4 +1,5 @@ id,text +0,- 1,a 2,b 3,c @@ -49997,4 +49998,4 @@ id,text 49996,x 49997,y 49998,z -49999,a \ No newline at end of file +49999,a diff --git a/ckanext/xloader/tests/test_action.py b/ckanext/xloader/tests/test_action.py index 71f4ad01..8b0e2729 100644 --- a/ckanext/xloader/tests/test_action.py +++ b/ckanext/xloader/tests/test_action.py @@ -4,6 +4,7 @@ except ImportError: import mock +from ckan.plugins.toolkit import NotAuthorized from ckan.tests import helpers, factories from ckanext.xloader.utils import get_xloader_user_apitoken @@ -30,6 +31,25 @@ def test_submit(self): ) assert 1 == enqueue_mock.call_count + def test_submit_to_custom_queue_without_auth(self): + # check that xloader_submit doesn't allow regular users to change queues + user = factories.User() + with pytest.raises(NotAuthorized): + helpers.call_auth( + "xloader_submit", + context=dict(user=user["name"], model=None), + queue='foo', + ) + + def test_submit_to_custom_queue_as_sysadmin(self): + # check that xloader_submit allows sysadmins to change queues + user = factories.Sysadmin() + assert helpers.call_auth( + "xloader_submit", + context=dict(user=user["name"], model=None), + queue='foo', + ) is True + def test_duplicated_submits(self): def submit(res, user): return helpers.call_action( diff --git a/ckanext/xloader/tests/test_jobs.py b/ckanext/xloader/tests/test_jobs.py index 648a2451..e819dad9 100644 --- a/ckanext/xloader/tests/test_jobs.py +++ b/ckanext/xloader/tests/test_jobs.py @@ -1,740 +1,131 @@ -from __future__ import absolute_import -import os -import json -import random -import datetime -import time -import six - -try: - from collections import OrderedDict # from python 2.7 -except ImportError: - from sqlalchemy.util import OrderedDict import pytest +import io -from nose.tools import make_decorator -try: - from unittest import mock -except ImportError: - import mock -import responses -from sqlalchemy import MetaData, Table -from sqlalchemy.sql import select - -import ckan.plugins as p +from datetime import datetime -try: - config = p.toolkit.config -except AttributeError: - from pylons import config - -from ckanext.xloader import jobs -from ckanext.xloader import db as jobs_db -from ckanext.xloader.loader import get_write_engine +from requests import Response +from ckan.cli.cli import ckan +from ckan.plugins import toolkit from ckan.tests import helpers, factories -SOURCE_URL = "http://www.example.com/static/file" - - -def mock_actions(func): - """ - Decorator that mocks actions used by these tests - Based on ckan.test.helpers.mock_action - """ - - def wrapper(*args, **kwargs): - # Mock CKAN's resource_show API - from ckan.logic import get_action as original_get_action - - def side_effect(called_action_name): - if called_action_name == "resource_show": - - def mock_resource_show(context, data_dict): - return { - "id": data_dict["id"], - "name": "short name", - "url": SOURCE_URL, - "format": "", - "package_id": "test-pkg", - } - - return mock_resource_show - elif called_action_name == "package_show": - - def mock_package_show(context, data_dict): - return { - "id": data_dict["id"], - "name": "pkg-name", - } - - return mock_package_show - else: - return original_get_action(called_action_name) - - try: - with mock.patch( - "ckanext.xloader.jobs.get_action" - ) as mock_get_action: - mock_get_action.side_effect = side_effect - - return_value = func(*args, **kwargs) - finally: - pass - # Make sure to stop the mock, even with an exception - # mock_action.stop() - return return_value - - return make_decorator(func)(wrapper) - - -@pytest.mark.skip -@pytest.mark.usefixtures("with_plugins") -@pytest.mark.ckan_config("ckan.plugins", "datastore xloader") -class TestxloaderDataIntoDatastore(object): - - @pytest.fixture(autouse=True) - def setup_class(self): - self.host = "www.ckan.org" - self.api_key = "my-fake-key" - self.resource_id = "foo-bar-42" - factories.Resource(id=self.resource_id) - jobs_db.init(config, echo=False) - # drop test table - engine, conn = self.get_datastore_engine_and_connection() - conn.execute('DROP TABLE IF EXISTS "{}"'.format(self.resource_id)) - yield - if "_datastore" in dir(self): - connection = self._datastore[1] - connection.close() - - def register_urls( - self, filename="simple.csv", content_type="application/csv" - ): - """Mock some test URLs with responses. - - Mocks some URLs related to a data file and a CKAN resource that - contains the data file, including the URL of the data file itself and - the resource_show, resource_update and datastore_delete URLs. - - :returns: a 2-tuple containing the URL of the data file itself and the - resource_show URL for the resource that contains the data file - - """ - responses.add_passthru(config["solr_url"]) - - # A URL that just returns a static file - responses.add( - responses.GET, - SOURCE_URL, - body=get_sample_file(filename), - content_type=content_type, - ) - - # A URL that mocks the response that CKAN's resource_update API would - # give after successfully updating a resource. - resource_update_url = ( - "http://www.ckan.org/api/3/action/resource_update" - ) - responses.add( - responses.POST, - resource_update_url, - body=json.dumps({"success": True}), - content_type="application/json", - ) - - # A URL that mock's the response that CKAN's datastore plugin's - # datastore_delete API would give after successfully deleting a - # resource from the datastore. - datastore_del_url = "http://www.ckan.org/api/3/action/datastore_delete" - responses.add( - responses.POST, - datastore_del_url, - body=json.dumps({"success": True}), - content_type="application/json", - ) - - self.callback_url = "http://www.ckan.org/api/3/action/xloader_hook" - responses.add( - responses.POST, - self.callback_url, - body=json.dumps({"success": True}), - content_type="application/json", - ) - - @classmethod - def get_datastore_engine_and_connection(cls): - if "_datastore" not in dir(cls): - engine = get_write_engine() - conn = engine.connect() - cls._datastore = (engine, conn) - return cls._datastore - - def get_datastore_table(self): - engine, conn = self.get_datastore_engine_and_connection() - meta = MetaData(bind=engine) - table = Table( - self.resource_id, meta, autoload=True, autoload_with=engine - ) - s = select([table]) - with conn.begin(): - result = conn.execute(s) - return dict( - num_rows=result.rowcount, - headers=list(result.keys()), - header_dict=OrderedDict( - [(c.key, six.text_type(c.type)) for c in table.columns] - ), - rows=result.fetchall(), - ) - - def get_load_logs(self, task_id): - conn = jobs_db.ENGINE.connect() - logs = jobs_db.LOGS_TABLE - result = conn.execute( - select([logs.c.level, logs.c.message]).where( - logs.c.job_id == task_id - ) - ) - return Logs(result.fetchall()) - - def get_time_of_last_analyze(self): - # When ANALYZE runs it appears to take a moment for the - # pg_stat_user_tables to update, which we use to check analyze runs, - # so sadly we need a sleep :( - # DR: 0.25 is pretty reliable on my machine, but give a wide margin - time.sleep(1) - engine, conn = self.get_datastore_engine_and_connection() - result = conn.execute( - """ - SELECT last_analyze, last_autoanalyze - FROM pg_stat_user_tables - WHERE relname='{}'; - """.format( - self.resource_id - ) - ) - last_analyze_datetimes = result.fetchall()[0] - return max([x for x in last_analyze_datetimes if x] or [None]) - - @mock_actions - @responses.activate - def test_simple_csv(self): - # Test not only the load and xloader_hook is called at the end - self.register_urls(filename="simple.csv") - data = { - "api_key": self.api_key, - "job_type": "xloader_to_datastore", - "result_url": self.callback_url, - "metadata": { - "ckan_url": "http://%s/" % self.host, - "resource_id": self.resource_id, - }, - } - job_id = "test{}".format(random.randint(0, 1e5)) - - with mock.patch( - "ckanext.xloader.jobs.set_resource_metadata" - ) as mocked_set_resource_metadata: - # in tests we call jobs directly, rather than use rq, so mock - # get_current_job() - with mock.patch( - "ckanext.xloader.jobs.get_current_job", - return_value=mock.Mock(id=job_id), - ): - result = jobs.xloader_data_into_datastore(data) - assert result is None, jobs_db.get_job(job_id)["error"]["message"] - - # Check it said it was successful - assert ( - responses.calls[-1].request.url - == "http://www.ckan.org/api/3/action/xloader_hook" - ) - job_dict = json.loads(responses.calls[-1].request.body) - assert job_dict["status"] == u"complete", job_dict - assert job_dict == { - u"metadata": { - u"datastore_contains_all_records_of_source_file": True, - u"datastore_active": True, - u"ckan_url": u"http://www.ckan.org/", - u"resource_id": u"foo-bar-42", - }, - u"status": u"complete", - } - - # Check the load - data = self.get_datastore_table() - assert data["headers"] == [ - "_id", - "_full_text", - "date", - "temperature", - "place", - ] - assert data["header_dict"]["date"] == "TEXT" - # 'TIMESTAMP WITHOUT TIME ZONE') - assert data["header_dict"]["temperature"] == "TEXT" # 'NUMERIC') - assert data["header_dict"]["place"] == "TEXT" # 'TEXT') - assert data["num_rows"] == 6 - assert data["rows"][0][2:] == (u"2011-01-01", u"1", u"Galway") - # (datetime.datetime(2011, 1, 1), 1, 'Galway')) - - # Check it wanted to set the datastore_active=True - mocked_set_resource_metadata.assert_called_once() - assert mocked_set_resource_metadata.call_args[1]["update_dict"] == { - "datastore_contains_all_records_of_source_file": True, - "datastore_active": True, - "ckan_url": "http://www.ckan.org/", - "resource_id": "foo-bar-42", - } - - logs = self.get_load_logs(job_id) - logs.assert_no_errors() - - job = jobs_db.get_job(job_id) - assert job["status"] == u"complete" - assert job["error"] is None - - # Check ANALYZE was run - last_analyze = self.get_time_of_last_analyze() - assert last_analyze - - @mock_actions - @responses.activate - @mock.patch("ckanext.xloader.jobs.MAX_CONTENT_LENGTH", 10000) - @mock.patch("ckanext.xloader.jobs.MAX_EXCERPT_LINES", 100) - def test_too_large_csv(self): - - # Test not only the load and xloader_hook is called at the end - self.register_urls(filename="simple-large.csv") - data = { - "api_key": self.api_key, - "job_type": "xloader_to_datastore", - "result_url": self.callback_url, - "metadata": { - "ckan_url": "http://%s/" % self.host, - "resource_id": self.resource_id, - }, - } - job_id = "test{}".format(random.randint(0, 1e5)) - - with mock.patch( - "ckanext.xloader.jobs.set_resource_metadata" - ) as mocked_set_resource_metadata: - # in tests we call jobs directly, rather than use rq, so mock - # get_current_job() - with mock.patch( - "ckanext.xloader.jobs.get_current_job", - return_value=mock.Mock(id=job_id), - ): - result = jobs.xloader_data_into_datastore(data) - assert result is None, jobs_db.get_job(job_id)["error"]["message"] - - # Check it said it was successful - assert ( - responses.calls[-1].request.url - == "http://www.ckan.org/api/3/action/xloader_hook" - ) - job_dict = json.loads(responses.calls[-1].request.body) - assert job_dict["status"] == u"complete", job_dict - assert job_dict == { - u"metadata": { - u"datastore_contains_all_records_of_source_file": False, - u"datastore_active": True, - u"ckan_url": u"http://www.ckan.org/", - u"resource_id": u"foo-bar-42", - }, - u"status": u"complete", - } - - # Check the load - data = self.get_datastore_table() - assert data["headers"] == ["_id", "_full_text", "id", "text"] - assert data["header_dict"]["id"] == "TEXT" - # 'TIMESTAMP WITHOUT TIME ZONE') - assert data["header_dict"]["text"] == "TEXT" - assert data["num_rows"] <= 100 - assert data["num_rows"] > 0 - assert data["rows"][0][2:] == (u"1", u"a") - - # Check it wanted to set the datastore_active=True - mocked_set_resource_metadata.assert_called_once() - assert mocked_set_resource_metadata.call_args[1]["update_dict"] == { - "datastore_contains_all_records_of_source_file": False, - "datastore_active": True, - "ckan_url": "http://www.ckan.org/", - "resource_id": "foo-bar-42", - } - - logs = self.get_load_logs(job_id) - logs.assert_no_errors() - - job = jobs_db.get_job(job_id) - assert job["status"] == u"complete" - assert job["error"] is None - - # Check ANALYZE was run - last_analyze = self.get_time_of_last_analyze() - assert last_analyze - - @mock_actions - @responses.activate - @mock.patch("ckanext.xloader.jobs.MAX_CONTENT_LENGTH", 10000) - @mock.patch("ckanext.xloader.jobs.MAX_EXCERPT_LINES", 100) - def test_too_large_xls(self): - - # Test not only the load and xloader_hook is called at the end - self.register_urls(filename="simple-large.xls") - data = { - "api_key": self.api_key, - "job_type": "xloader_to_datastore", - "result_url": self.callback_url, - "metadata": { - "ckan_url": "http://%s/" % self.host, - "resource_id": self.resource_id, - }, - } - job_id = "test{}".format(random.randint(0, 1e5)) - - with mock.patch("ckanext.xloader.jobs.set_resource_metadata"): - # in tests we call jobs directly, rather than use rq, so mock - # get_current_job() - with mock.patch( - "ckanext.xloader.jobs.get_current_job", - return_value=mock.Mock(id=job_id), - ): - result = jobs.xloader_data_into_datastore(data) - assert result is not None, jobs_db.get_job(job_id)["error"]["message"] - - # Check it said it was successful - assert ( - responses.calls[-1].request.url - == "http://www.ckan.org/api/3/action/xloader_hook" - ) - job_dict = json.loads(responses.calls[-1].request.body) - assert job_dict["status"] == u"error", job_dict - assert job_dict == { - u"status": u"error", - u"metadata": { - u"ckan_url": u"http://www.ckan.org/", - u"datastore_contains_all_records_of_source_file": False, - u"resource_id": u"foo-bar-42", - }, - u"error": u"Loading file raised an error: array index out of range", - } - - job = jobs_db.get_job(job_id) - assert job["status"] == u"error" - assert job["error"] == { - u"message": u"Loading file raised an error: array index out of range" - } - - @mock_actions - @responses.activate - def test_tabulator(self): - # xloader's COPY can't handle xls, so it will be dealt with by - # tabulator - self.register_urls( - filename="simple.xls", content_type="application/vnd.ms-excel" - ) - data = { - "api_key": self.api_key, - "job_type": "xloader_to_datastore", - "result_url": self.callback_url, - "metadata": { - "ckan_url": "http://%s/" % self.host, - "resource_id": self.resource_id, - }, - } - job_id = "test{}".format(random.randint(0, 1e5)) - - with mock.patch( - "ckanext.xloader.jobs.set_resource_metadata" - ) as mocked_set_resource_metadata: - # in tests we call jobs directly, rather than use rq, so mock - # get_current_job() - with mock.patch( - "ckanext.xloader.jobs.get_current_job", - return_value=mock.Mock(id=job_id), - ): - result = jobs.xloader_data_into_datastore(data) - assert result is None - - # Check it said it was successful - assert ( - responses.calls[-1].request.url - == "http://www.ckan.org/api/3/action/xloader_hook" - ) - job_dict = json.loads(responses.calls[-1].request.body) - assert job_dict["status"] == u"complete", job_dict - assert job_dict == { - u"metadata": { - u"datastore_contains_all_records_of_source_file": True, - u"datastore_active": True, - u"ckan_url": u"http://www.ckan.org/", - u"resource_id": u"foo-bar-42", - }, - u"status": u"complete", - } - - # Check the load - data = self.get_datastore_table() - assert data["headers"] == [ - "_id", - "_full_text", - "date", - "temperature", - "place", - ] - assert data["header_dict"]["date"] == "TIMESTAMP WITHOUT TIME ZONE" - assert data["header_dict"]["temperature"] == "NUMERIC" - assert data["header_dict"]["place"] == "TEXT" - assert data["num_rows"] == 6 - assert data["rows"][0][2:] == ( - datetime.datetime(2011, 1, 1), - 1, - u"Galway", - ) - - # Check it wanted to set the datastore_active=True - mocked_set_resource_metadata.assert_called_once() - assert mocked_set_resource_metadata.call_args[1]["update_dict"] == { - "ckan_url": "http://www.ckan.org/", - "datastore_contains_all_records_of_source_file": True, - "datastore_active": True, - "resource_id": "foo-bar-42", - } - - # check logs have the error doing the COPY - logs = self.get_load_logs(job_id) - copy_error_index = None - for i, log in enumerate(logs): - if log[0] == "WARNING" and log[1].startswith( - "Load using COPY failed: Error during the load into PostgreSQL" - ): - copy_error_index = i - break - assert copy_error_index, "Missing COPY error" - - # check messytable portion of the logs - logs = Logs(logs[copy_error_index + 1:]) - assert logs[0] == (u"INFO", u"Trying again with tabulator") - logs.assert_no_errors() - - # Check ANALYZE was run - last_analyze = self.get_time_of_last_analyze() - assert last_analyze - - @mock_actions - @responses.activate - def test_umlaut_and_extra_comma(self): - self.register_urls(filename="umlaut_and_extra_comma.csv") - # This csv has an extra comma which causes the COPY to throw a - # psycopg2.DataError and the umlaut can cause problems for logging the - # error. We need to check that it correctly reverts to using - # tabulator to load it - data = { - "api_key": self.api_key, - "job_type": "xloader_to_datastore", - "result_url": self.callback_url, - "metadata": { - "ckan_url": "http://%s/" % self.host, - "resource_id": self.resource_id, - }, - } - job_id = "test{}".format(random.randint(0, 1e5)) - - with mock.patch("ckanext.xloader.jobs.set_resource_metadata"): - # in tests we call jobs directly, rather than use rq, so mock - # get_current_job() - with mock.patch( - "ckanext.xloader.jobs.get_current_job", - return_value=mock.Mock(id=job_id), - ): - result = jobs.xloader_data_into_datastore(data) - assert result is None, jobs_db.get_job(job_id)["error"]["message"] - - # Check it said it was successful - assert ( - responses.calls[-1].request.url - == "http://www.ckan.org/api/3/action/xloader_hook" - ) - job_dict = json.loads(responses.calls[-1].request.body) - assert job_dict["status"] == u"complete", job_dict - assert job_dict == { - u"metadata": { - u"datastore_contains_all_records_of_source_file": True, - u"datastore_active": True, - u"ckan_url": u"http://www.ckan.org/", - u"resource_id": u"foo-bar-42", - }, - u"status": u"complete", - } - - logs = self.get_load_logs(job_id) - logs.assert_no_errors() - - job = jobs_db.get_job(job_id) - assert job["status"] == u"complete" - assert job["error"] is None - - @mock_actions - @responses.activate - def test_invalid_byte_sequence(self): - self.register_urls(filename='go-realtime.xlsx') - # This xlsx throws an Postgres error on INSERT because of - # 'invalid byte sequence for encoding "UTF8": 0x00' which causes - # the COPY to throw a psycopg2.DataError and umlauts in the file can - # cause problems for logging the error. We need to check that - # it correctly reverts to using tabulator to load it - data = { - 'api_key': self.api_key, - 'job_type': 'xloader_to_datastore', - 'result_url': self.callback_url, - 'metadata': { - 'ckan_url': 'http://%s/' % self.host, - 'resource_id': self.resource_id - } - } - job_id = "test{}".format(random.randint(0, 1e5)) - - with mock.patch('ckanext.xloader.jobs.set_datastore_active_flag'): - # in tests we call jobs directly, rather than use rq, so mock - # get_current_job() - with mock.patch( - "ckanext.xloader.jobs.get_current_job", - return_value=mock.Mock(id=job_id), - ): - result = jobs.xloader_data_into_datastore(data) - assert result is None, jobs_db.get_job(job_id)["error"]["message"] - - # Check it said it was successful - assert responses.calls[-1].request.url == \ - 'http://www.ckan.org/api/3/action/xloader_hook' - job_dict = json.loads(responses.calls[-1].request.body) - assert job_dict['status'] == u'complete', job_dict - assert job_dict == \ - {u'metadata': {u'ckan_url': u'http://www.ckan.org/', - u'resource_id': u'foo-bar-42'}, - u'status': u'complete'} - - logs = self.get_load_logs(job_id) - logs.assert_no_errors() - - job = jobs_db.get_job(job_id) - assert job['status'] == u'complete' - assert job['error'] is None - - @mock_actions - @responses.activate - def test_first_request_is_202_pending_response(self): - # when you first get the CSV it returns this 202 response, which is - # what this server does: https://data-cdfw.opendata.arcgis.com/datasets - responses.add( - responses.GET, - SOURCE_URL, - status=202, - body='{"processingTime":"8.716 seconds","status":"Processing","generating":{}}', - content_type="application/json", - ) - # subsequent GETs of the CSV work fine - self.register_urls() - data = { - "api_key": self.api_key, - "job_type": "xloader_to_datastore", - "result_url": self.callback_url, - "metadata": { - "ckan_url": "http://%s/" % self.host, - "resource_id": self.resource_id, - }, - } - job_id = "test{}".format(random.randint(0, 1e5)) - - with mock.patch( - "ckanext.xloader.jobs.set_resource_metadata" - ) as mocked_set_resource_metadata: - # in tests we call jobs directly, rather than use rq, so mock - # get_current_job() - with mock.patch( - "ckanext.xloader.jobs.get_current_job", - return_value=mock.Mock(id=job_id), - ): - result = jobs.xloader_data_into_datastore(data) - assert result is None, jobs_db.get_job(job_id)["error"]["message"] - - # Check it said it was successful - assert ( - responses.calls[-1].request.url - == "http://www.ckan.org/api/3/action/xloader_hook" - ) - job_dict = json.loads(responses.calls[-1].request.body) - assert job_dict["status"] == u"complete", job_dict - assert job_dict == { - u"metadata": { - u"ckan_url": u"http://www.ckan.org/", - u"datastore_contains_all_records_of_source_file": True, - u"datastore_active": True, - u"resource_id": u"foo-bar-42", - }, - u"status": u"complete", - } - - # Check the load - data = self.get_datastore_table() - assert data["headers"] == [ - "_id", - "_full_text", - "date", - "temperature", - "place", - ] - assert data["header_dict"]["date"] == "TEXT" - # 'TIMESTAMP WITHOUT TIME ZONE') - assert data["header_dict"]["temperature"] == "TEXT" # 'NUMERIC') - assert data["header_dict"]["place"] == "TEXT" # 'TEXT') - assert data["num_rows"] == 6 - assert data["rows"][0][2:] == (u"2011-01-01", u"1", u"Galway") - # (datetime.datetime(2011, 1, 1), 1, 'Galway')) - - # Check it wanted to set the datastore_active=True - mocked_set_resource_metadata.assert_called_once() - assert mocked_set_resource_metadata.call_args[1]["update_dict"] == { - "datastore_contains_all_records_of_source_file": True, - "datastore_active": True, - "ckan_url": "http://www.ckan.org/", - "resource_id": "foo-bar-42", - } - - logs = self.get_load_logs(job_id) - logs.assert_no_errors() - - job = jobs_db.get_job(job_id) - assert job["status"] == u"complete" - assert job["error"] is None - - -class Logs(list): - def get_errors(self): - return [message for level, message in self if level == "ERROR"] - - def grep(self, text): - return [message for level, message in self if text in message] - - def assert_no_errors(self): - errors = self.get_errors() - assert not errors, errors - - -def get_sample_file(filename): - filepath = os.path.join(os.path.dirname(__file__), "samples", filename) - return open(filepath).read() - +from unittest import mock +from ckanext.xloader import jobs +from ckanext.xloader.utils import get_xloader_user_apitoken + + +_TEST_FILE_CONTENT = "x, y\n1,2\n2,4\n3,6\n4,8\n5,10" + + +def get_response(download_url, headers): + """Mock jobs.get_response() method.""" + resp = Response() + resp.raw = io.BytesIO(_TEST_FILE_CONTENT.encode()) + resp.headers = headers + return resp + + +def get_large_response(download_url, headers): + """Mock jobs.get_response() method to fake a large file.""" + resp = Response() + resp.raw = io.BytesIO(_TEST_FILE_CONTENT.encode()) + resp.headers = {'content-length': 2000000000} + return resp + + +@pytest.fixture +def apikey(): + if toolkit.check_ckan_version(min_version="2.10"): + sysadmin = factories.SysadminWithToken() + else: + # To provide support with CKAN 2.9 + sysadmin = factories.Sysadmin() + sysadmin["token"] = get_xloader_user_apitoken() + + return sysadmin["token"] + + +@pytest.fixture +def data(create_with_upload, apikey): + dataset = factories.Dataset() + resource = create_with_upload( + _TEST_FILE_CONTENT, + "multiplication_2.csv", + url="http://data", + package_id=dataset["id"] + ) + callback_url = toolkit.url_for( + "api.action", ver=3, logic_function="xloader_hook", qualified=True + ) + return { + 'api_key': apikey, + 'job_type': 'xloader_to_datastore', + 'result_url': callback_url, + 'metadata': { + 'ignore_hash': True, + 'ckan_url': toolkit.config.get('ckan.site_url'), + 'resource_id': resource["id"], + 'set_url_type': False, + 'task_created': datetime.utcnow().isoformat(), + 'original_url': resource["url"], + } + } + + +@pytest.mark.usefixtures("clean_db", "with_plugins") +class TestXLoaderJobs(helpers.FunctionalRQTestBase): + + def test_xloader_data_into_datastore(self, cli, data): + self.enqueue(jobs.xloader_data_into_datastore, [data]) + with mock.patch("ckanext.xloader.jobs.get_response", get_response): + stdout = cli.invoke(ckan, ["jobs", "worker", "--burst"]).output + assert "File hash: d44fa65eda3675e11710682fdb5f1648" in stdout + assert "Fields: [{'id': 'x', 'type': 'text'}, {'id': 'y', 'type': 'text'}]" in stdout + assert "Copying to database..." in stdout + assert "Creating search index..." in stdout + assert "Express Load completed" in stdout + + resource = helpers.call_action("resource_show", id=data["metadata"]["resource_id"]) + assert resource["datastore_contains_all_records_of_source_file"] + + def test_xloader_ignore_hash(self, cli, data): + self.enqueue(jobs.xloader_data_into_datastore, [data]) + with mock.patch("ckanext.xloader.jobs.get_response", get_response): + stdout = cli.invoke(ckan, ["jobs", "worker", "--burst"]).output + assert "Express Load completed" in stdout + + self.enqueue(jobs.xloader_data_into_datastore, [data]) + with mock.patch("ckanext.xloader.jobs.get_response", get_response): + stdout = cli.invoke(ckan, ["jobs", "worker", "--burst"]).output + assert "Copying to database..." in stdout + assert "Express Load completed" in stdout + + data["metadata"]["ignore_hash"] = False + self.enqueue(jobs.xloader_data_into_datastore, [data]) + with mock.patch("ckanext.xloader.jobs.get_response", get_response): + stdout = cli.invoke(ckan, ["jobs", "worker", "--burst"]).output + assert "Ignoring resource - the file hash hasn't changed" in stdout + + def test_data_too_big_error_if_content_length_bigger_than_config(self, cli, data): + self.enqueue(jobs.xloader_data_into_datastore, [data]) + with mock.patch("ckanext.xloader.jobs.get_response", get_large_response): + stdout = cli.invoke(ckan, ["jobs", "worker", "--burst"]).output + assert "Data too large to load into Datastore:" in stdout + + def test_data_max_excerpt_lines_config(self, cli, data): + self.enqueue(jobs.xloader_data_into_datastore, [data]) + with mock.patch("ckanext.xloader.jobs.get_response", get_large_response): + with mock.patch("ckanext.xloader.jobs.MAX_EXCERPT_LINES", 1): + stdout = cli.invoke(ckan, ["jobs", "worker", "--burst"]).output + assert "Loading excerpt of ~1 lines to DataStore." in stdout + + resource = helpers.call_action("resource_show", id=data["metadata"]["resource_id"]) + assert resource["datastore_contains_all_records_of_source_file"] is False + + +@pytest.mark.usefixtures("clean_db") class TestSetResourceMetadata(object): - @classmethod - def setup_class(cls): - helpers.reset_db() - def test_simple(self): resource = factories.Resource() @@ -748,15 +139,6 @@ def test_simple(self): ) resource = helpers.call_action("resource_show", id=resource["id"]) - from pprint import pprint - - pprint(resource) - assert resource["datastore_contains_all_records_of_source_file"] in ( - True, - u"True", - ) - # I'm not quite sure why this is a string on travis - I get the bool - # locally - + assert resource["datastore_contains_all_records_of_source_file"] assert resource["datastore_active"] assert resource["ckan_url"] == "http://www.ckan.org/" diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py index f31b663b..e8816a13 100644 --- a/ckanext/xloader/tests/test_loader.py +++ b/ckanext/xloader/tests/test_loader.py @@ -64,8 +64,12 @@ def _get_column_names(self, Session, table_name): # SELECT column_name FROM information_schema.columns WHERE table_name='test1'; c = Session.connection() sql = ( - "SELECT column_name FROM information_schema.columns " - "WHERE table_name='{}';".format(table_name) + """ + SELECT column_name + FROM information_schema.columns + WHERE table_name='{}' + ORDER BY ordinal_position; + """.format(table_name) ) results = c.execute(sql) records = results.fetchall() @@ -74,8 +78,12 @@ def _get_column_names(self, Session, table_name): def _get_column_types(self, Session, table_name): c = Session.connection() sql = ( - "SELECT udt_name FROM information_schema.columns " - "WHERE table_name='{}';".format(table_name) + """ + SELECT udt_name + FROM information_schema.columns + WHERE table_name='{}' + ORDER BY ordinal_position; + """.format(table_name) ) results = c.execute(sql) records = results.fetchall() @@ -85,8 +93,8 @@ def _get_column_types(self, Session, table_name): class TestLoadCsv(TestLoadBase): def test_simple(self, Session): csv_filepath = get_sample_filepath("simple.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_csv( csv_filepath, resource_id=resource_id, @@ -95,7 +103,7 @@ def test_simple(self, Session): ) assert self._get_records( - Session, "test1", limit=1, exclude_full_text_column=False + Session, resource_id, limit=1, exclude_full_text_column=False ) == [ ( 1, @@ -105,7 +113,7 @@ def test_simple(self, Session): u"Galway", ) ] - assert self._get_records(Session, "test1") == [ + assert self._get_records(Session, resource_id) == [ (1, u"2011-01-01", u"1", u"Galway"), (2, u"2011-01-02", u"-1", u"Galway"), (3, u"2011-01-03", u"0", u"Galway"), @@ -113,14 +121,14 @@ def test_simple(self, Session): (5, None, None, u"Berkeley"), (6, u"2011-01-03", u"5", None), ] - assert self._get_column_names(Session, "test1") == [ + assert self._get_column_names(Session, resource_id) == [ u"_id", u"_full_text", u"date", u"temperature", u"place", ] - assert self._get_column_types(Session, "test1") == [ + assert self._get_column_types(Session, resource_id) == [ u"int4", u"tsvector", u"text", @@ -130,8 +138,8 @@ def test_simple(self, Session): def test_simple_with_indexing(self, Session): csv_filepath = get_sample_filepath("simple.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] fields = loader.load_csv( csv_filepath, resource_id=resource_id, @@ -144,7 +152,7 @@ def test_simple_with_indexing(self, Session): assert ( self._get_records( - Session, "test1", limit=1, exclude_full_text_column=False + Session, resource_id, limit=1, exclude_full_text_column=False )[0][1] == "'-01':2,3 '1':4 '2011':1 'galway':5" ) @@ -155,8 +163,8 @@ def test_boston_311_complete(self): # to get the test file: # curl -o ckanext/xloader/tests/samples/boston_311.csv https://data.boston.gov/dataset/8048697b-ad64-4bfc-b090-ee00169f2323/resource/2968e2c0-d479-49ba-a884-4ef523ada3c0/download/311.csv # noqa csv_filepath = get_sample_filepath("boston_311.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] import time t0 = time.time() @@ -179,8 +187,8 @@ def test_boston_311_sample5(self): # to create the test file: # head -n 100001 ckanext/xloader/tests/samples/boston_311.csv > ckanext/xloader/tests/samples/boston_311_sample5.csv csv_filepath = get_sample_filepath("boston_311_sample5.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] import time t0 = time.time() @@ -199,8 +207,8 @@ def test_boston_311_sample5(self): def test_boston_311(self, Session): csv_filepath = get_sample_filepath("boston_311_sample.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_csv( csv_filepath, resource_id=resource_id, @@ -208,7 +216,7 @@ def test_boston_311(self, Session): logger=logger, ) - records = self._get_records(Session, "test1") + records = self._get_records(Session, resource_id) print(records) assert records == [ ( @@ -308,8 +316,8 @@ def test_boston_311(self, Session): u"Citizens Connect App", ), ] # noqa - print(self._get_column_names(Session, "test1")) - assert self._get_column_names(Session, "test1") == [ + print(self._get_column_names(Session, resource_id)) + assert self._get_column_names(Session, resource_id) == [ u"_id", u"_full_text", u"CASE_ENQUIRY_ID", @@ -342,16 +350,16 @@ def test_boston_311(self, Session): u"Longitude", u"Source", ] # noqa - print(self._get_column_types(Session, "test1")) - assert self._get_column_types(Session, "test1") == [ + print(self._get_column_types(Session, resource_id)) + assert self._get_column_types(Session, resource_id) == [ u"int4", u"tsvector", ] + [u"text"] * (len(records[0]) - 1) def test_brazilian(self, Session): csv_filepath = get_sample_filepath("brazilian_sample.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_csv( csv_filepath, resource_id=resource_id, @@ -359,7 +367,7 @@ def test_brazilian(self, Session): logger=logger, ) - records = self._get_records(Session, "test1") + records = self._get_records(Session, resource_id) print(records) assert records[0] == ( 1, @@ -459,8 +467,8 @@ def test_brazilian(self, Session): None, None, ) # noqa - print(self._get_column_names(Session, "test1")) - assert self._get_column_names(Session, "test1") == [ + print(self._get_column_names(Session, resource_id)) + assert self._get_column_names(Session, resource_id) == [ u"_id", u"_full_text", u"NU_ANO_CENSO", @@ -559,16 +567,16 @@ def test_brazilian(self, Session): u"PROVA_MEAN_MAT_I_MUN", u"PROVA_MEAN_MAT_T_MUN", ] # noqa - print(self._get_column_types(Session, "test1")) - assert self._get_column_types(Session, "test1") == [ + print(self._get_column_types(Session, resource_id)) + assert self._get_column_types(Session, resource_id) == [ u"int4", u"tsvector", ] + [u"text"] * (len(records[0]) - 1) def test_german(self, Session): csv_filepath = get_sample_filepath("german_sample.csv") - resource_id = "test_german" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_csv( csv_filepath, resource_id=resource_id, @@ -576,7 +584,7 @@ def test_german(self, Session): logger=logger, ) - records = self._get_records(Session, "test_german") + records = self._get_records(Session, resource_id) print(records) assert records[0] == ( 1, @@ -591,8 +599,8 @@ def test_german(self, Session): u"24221", u"672", ) - print(self._get_column_names(Session, "test_german")) - assert self._get_column_names(Session, "test_german") == [ + print(self._get_column_names(Session, resource_id)) + assert self._get_column_names(Session, resource_id) == [ u"_id", u"_full_text", u"Stadtname", @@ -606,16 +614,76 @@ def test_german(self, Session): u"Schuler_Berufsausbildung_2010/2011", u"Schuler_andere allgemeinbildende Schulen_2010/2011", ] - print(self._get_column_types(Session, "test_german")) - assert self._get_column_types(Session, "test_german") == [ + print(self._get_column_types(Session, resource_id)) + assert self._get_column_types(Session, resource_id) == [ u"int4", u"tsvector", ] + [u"text"] * (len(records[0]) - 1) + def test_with_blanks(self, Session): + csv_filepath = get_sample_filepath("sample_with_blanks.csv") + resource = factories.Resource() + resource_id = resource['id'] + loader.load_csv( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, resource_id)) == 3 + + def test_with_empty_lines(self, Session): + csv_filepath = get_sample_filepath("sample_with_empty_lines.csv") + resource = factories.Resource() + resource_id = resource['id'] + loader.load_csv( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, resource_id)) == 6 + + def test_with_quoted_commas(self, Session): + csv_filepath = get_sample_filepath("sample_with_quoted_commas.csv") + resource = factories.Resource() + resource_id = resource['id'] + loader.load_csv( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, resource_id)) == 3 + + def test_with_mixed_quotes(self, Session): + csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv") + resource = factories.Resource() + resource_id = resource['id'] + loader.load_csv( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, resource_id)) == 2 + + def test_with_mixed_types(self, Session): + csv_filepath = get_sample_filepath("mixed_numeric_string_sample.csv") + resource = factories.Resource() + resource_id = resource['id'] + loader.load_csv( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, resource_id)) == 6 + def test_reload(self, Session): csv_filepath = get_sample_filepath("simple.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_csv( csv_filepath, resource_id=resource_id, @@ -631,15 +699,15 @@ def test_reload(self, Session): logger=logger, ) - assert len(self._get_records(Session, "test1")) == 6 - assert self._get_column_names(Session, "test1") == [ + assert len(self._get_records(Session, resource_id)) == 6 + assert self._get_column_names(Session, resource_id) == [ u"_id", u"_full_text", u"date", u"temperature", u"place", ] - assert self._get_column_types(Session, "test1") == [ + assert self._get_column_types(Session, resource_id) == [ u"int4", u"tsvector", u"text", @@ -653,8 +721,8 @@ def test_reload(self, Session): ) def test_reload_with_overridden_types(self, Session): csv_filepath = get_sample_filepath("simple.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_csv( csv_filepath, resource_id=resource_id, @@ -684,15 +752,15 @@ def test_reload_with_overridden_types(self, Session): fields=fields, resource_id=resource_id, logger=logger ) - assert len(self._get_records(Session, "test1")) == 6 - assert self._get_column_names(Session, "test1") == [ + assert len(self._get_records(Session, resource_id)) == 6 + assert self._get_column_names(Session, resource_id) == [ u"_id", u"_full_text", u"date", u"temperature", u"place", ] - assert self._get_column_types(Session, "test1") == [ + assert self._get_column_types(Session, resource_id) == [ u"int4", u"tsvector", u"timestamp", @@ -702,7 +770,7 @@ def test_reload_with_overridden_types(self, Session): # check that rows with nulls are indexed correctly records = self._get_records( - Session, "test1", exclude_full_text_column=False + Session, resource_id, exclude_full_text_column=False ) print(records) assert records[4][1] == "'berkeley':1" @@ -727,8 +795,8 @@ def test_encode_headers(self): def test_column_names(self, Session): csv_filepath = get_sample_filepath("column_names.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_csv( csv_filepath, resource_id=resource_id, @@ -736,12 +804,12 @@ def test_column_names(self, Session): logger=logger, ) - assert self._get_column_names(Session, "test1")[2:] == [ + assert self._get_column_names(Session, resource_id)[2:] == [ u"d@t$e", u"t^e&m*pe!r(a)t?u:r%%e", r"p\l/a[c{e%", ] - assert self._get_records(Session, "test1")[0] == ( + assert self._get_records(Session, resource_id)[0] == ( 1, u"2011-01-01", u"1", @@ -752,8 +820,8 @@ def test_column_names(self, Session): class TestLoadUnhandledTypes(TestLoadBase): def test_kml(self): filepath = get_sample_filepath("polling_locations.kml") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] with pytest.raises(LoaderError) as exception: loader.load_csv( filepath, @@ -769,8 +837,8 @@ def test_kml(self): def test_geojson(self): filepath = get_sample_filepath("polling_locations.geojson") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] with pytest.raises(LoaderError) as exception: loader.load_csv( filepath, @@ -791,8 +859,8 @@ def test_geojson(self): ) def test_shapefile_zip_python2(self): filepath = get_sample_filepath("polling_locations.shapefile.zip") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] with pytest.raises(LoaderError): loader.load_csv( filepath, @@ -811,8 +879,8 @@ def test_shapefile_zip_python3(self, Session): # finds, 'Polling_Locations.cpg'. This file only contains the # following data: `UTF-8`. filepath = get_sample_filepath("polling_locations.shapefile.zip") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_csv( filepath, resource_id=resource_id, @@ -820,8 +888,8 @@ def test_shapefile_zip_python3(self, Session): logger=logger, ) - assert self._get_records(Session, "test1") == [] - assert self._get_column_names(Session, "test1") == [ + assert self._get_records(Session, resource_id) == [] + assert self._get_column_names(Session, resource_id) == [ '_id', '_full_text', 'UTF-8' @@ -831,8 +899,8 @@ def test_shapefile_zip_python3(self, Session): class TestLoadTabulator(TestLoadBase): def test_simple(self, Session): csv_filepath = get_sample_filepath("simple.xls") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_table( csv_filepath, resource_id=resource_id, @@ -843,7 +911,7 @@ def test_simple(self, Session): assert ( "'galway':" in self._get_records( - Session, "test1", limit=1, exclude_full_text_column=False + Session, resource_id, limit=1, exclude_full_text_column=False )[0][1] ) # Indexed record looks like this (depending on CKAN version?): @@ -851,7 +919,7 @@ def test_simple(self, Session): # "'-01':4,5 '00':6,7,8 '1':1 '2011':3 'galway':2" # "'-01':2,3 '00':5,6 '1':7 '2011':1 'galway':8 't00':4" - assert self._get_records(Session, "test1") == [ + assert self._get_records(Session, resource_id) == [ (1, datetime.datetime(2011, 1, 1, 0, 0), Decimal("1"), u"Galway",), ( 2, @@ -879,14 +947,14 @@ def test_simple(self, Session): u"Berkeley", ), ] - assert self._get_column_names(Session, "test1") == [ + assert self._get_column_names(Session, resource_id) == [ u"_id", u"_full_text", u"date", u"temperature", u"place", ] - assert self._get_column_types(Session, "test1") == [ + assert self._get_column_types(Session, resource_id) == [ u"int4", u"tsvector", u"timestamp", @@ -894,14 +962,52 @@ def test_simple(self, Session): u"text", ] + def test_simple_large_file(self, Session): + csv_filepath = get_sample_filepath("simple-large.csv") + resource = factories.Resource() + resource_id = resource['id'] + loader.load_table( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert self._get_column_types(Session, resource_id) == [ + u"int4", + u"tsvector", + u"numeric", + u"text", + ] + + def test_with_mixed_types(self, Session): + csv_filepath = get_sample_filepath("mixed_numeric_string_sample.csv") + resource = factories.Resource() + resource_id = resource['id'] + loader.load_table( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, resource_id)) == 6 + + assert self._get_column_types(Session, resource_id) == [ + u'int4', + u'tsvector', + u'text', + u'text', + u'text', + u'numeric' + ] + # test disabled by default to avoid adding large file to repo and slow test @pytest.mark.skip def test_boston_311_complete(self): # to get the test file: # curl -o ckanext/xloader/tests/samples/boston_311.csv https://data.boston.gov/dataset/8048697b-ad64-4bfc-b090-ee00169f2323/resource/2968e2c0-d479-49ba-a884-4ef523ada3c0/download/311.csv # noqa csv_filepath = get_sample_filepath("boston_311.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] import time t0 = time.time() @@ -924,8 +1030,8 @@ def test_boston_311_sample5(self): # to create the test file: # head -n 100001 ckanext/xloader/tests/samples/boston_311.csv > ckanext/xloader/tests/samples/boston_311_sample5.csv csv_filepath = get_sample_filepath("boston_311_sample5.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] import time t0 = time.time() @@ -944,8 +1050,8 @@ def test_boston_311_sample5(self): def test_boston_311(self, Session): csv_filepath = get_sample_filepath("boston_311_sample.csv") - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] loader.load_table( csv_filepath, resource_id=resource_id, @@ -953,7 +1059,7 @@ def test_boston_311(self, Session): logger=logger, ) - records = self._get_records(Session, "test1") + records = self._get_records(Session, resource_id) print(records) assert records == [ ( @@ -1053,8 +1159,8 @@ def test_boston_311(self, Session): u"Citizens Connect App", ), ] # noqa - print(self._get_column_names(Session, "test1")) - assert self._get_column_names(Session, "test1") == [ + print(self._get_column_names(Session, resource_id)) + assert self._get_column_names(Session, resource_id) == [ u"_id", u"_full_text", u"CASE_ENQUIRY_ID", @@ -1087,8 +1193,8 @@ def test_boston_311(self, Session): u"Longitude", u"Source", ] # noqa - print(self._get_column_types(Session, "test1")) - assert self._get_column_types(Session, "test1") == [ + print(self._get_column_types(Session, resource_id)) + assert self._get_column_types(Session, resource_id) == [ u"int4", u"tsvector", u"numeric", @@ -1126,8 +1232,8 @@ def test_no_entries(self): csv_filepath = get_sample_filepath("no_entries.csv") # no datastore table is created - we need to except, or else # datastore_active will be set on a non-existent datastore table - resource_id = "test1" - factories.Resource(id=resource_id) + resource = factories.Resource() + resource_id = resource['id'] with pytest.raises(LoaderError): loader.load_table( csv_filepath, @@ -1135,3 +1241,60 @@ def test_no_entries(self): mimetype="csv", logger=logger, ) + + def test_with_quoted_commas(self, Session): + csv_filepath = get_sample_filepath("sample_with_quoted_commas.csv") + resource = factories.Resource() + resource_id = resource['id'] + loader.load_table( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, resource_id)) == 3 + + def test_with_iso_8859_1(self, Session): + csv_filepath = get_sample_filepath("non_utf8_sample.csv") + resource = factories.Resource() + resource_id = resource['id'] + loader.load_table( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, resource_id)) == 266 + + def test_with_mixed_quotes(self, Session): + csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv") + resource = factories.Resource() + resource_id = resource['id'] + loader.load_table( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert len(self._get_records(Session, resource_id)) == 2 + + def test_preserving_time_ranges(self, Session): + """ Time ranges should not be treated as timestamps + """ + csv_filepath = get_sample_filepath("non_timestamp_sample.csv") + resource = factories.Resource() + resource_id = resource['id'] + loader.load_table( + csv_filepath, + resource_id=resource_id, + mimetype="text/csv", + logger=logger, + ) + assert self._get_records(Session, resource_id) == [ + (1, "Adavale", 4474, Decimal("-25.9092582"), Decimal("144.5975769"), + "8:00", "16:00", datetime.datetime(2018, 7, 19)), + (2, "Aramac", 4726, Decimal("-22.971298"), Decimal("145.241481"), + "9:00-13:00", "14:00-16:45", datetime.datetime(2018, 7, 17)), + (3, "Barcaldine", 4725, Decimal("-23.55327901"), Decimal("145.289156"), + "9:00-12:30", "13:30-16:30", datetime.datetime(2018, 7, 20)) + ] diff --git a/ckanext/xloader/tests/test_parser.py b/ckanext/xloader/tests/test_parser.py index 67929d9f..ac4047dd 100644 --- a/ckanext/xloader/tests/test_parser.py +++ b/ckanext/xloader/tests/test_parser.py @@ -6,7 +6,7 @@ from datetime import datetime from tabulator import Stream -from ckanext.xloader.parser import XloaderCSVParser +from ckanext.xloader.parser import TypeConverter csv_filepath = os.path.abspath( os.path.join(os.path.dirname(__file__), "samples", "date_formats.csv") @@ -16,7 +16,7 @@ class TestParser(object): def test_simple(self): with Stream(csv_filepath, format='csv', - custom_parsers={'csv': XloaderCSVParser}) as stream: + post_parse=[TypeConverter().convert_types]) as stream: assert stream.sample == [ [ 'date', @@ -49,7 +49,7 @@ def test_simple(self): def test_dayfirst(self): print('test_dayfirst') with Stream(csv_filepath, format='csv', - custom_parsers={'csv': XloaderCSVParser}) as stream: + post_parse=[TypeConverter().convert_types]) as stream: assert stream.sample == [ [ 'date', @@ -82,7 +82,7 @@ def test_dayfirst(self): def test_yearfirst(self): print('test_yearfirst') with Stream(csv_filepath, format='csv', - custom_parsers={'csv': XloaderCSVParser}) as stream: + post_parse=[TypeConverter().convert_types]) as stream: assert stream.sample == [ [ 'date', @@ -115,7 +115,7 @@ def test_yearfirst(self): @pytest.mark.ckan_config("ckanext.xloader.parse_dates_yearfirst", True) def test_yearfirst_dayfirst(self): with Stream(csv_filepath, format='csv', - custom_parsers={'csv': XloaderCSVParser}) as stream: + post_parse=[TypeConverter().convert_types]) as stream: assert stream.sample == [ [ 'date', diff --git a/ckanext/xloader/tests/test_plugin.py b/ckanext/xloader/tests/test_plugin.py index 45e166c3..cc8a29ca 100644 --- a/ckanext/xloader/tests/test_plugin.py +++ b/ckanext/xloader/tests/test_plugin.py @@ -7,8 +7,10 @@ except ImportError: import mock from six import text_type as str + from ckan.tests import helpers, factories from ckan.logic import _actions +from ckanext.xloader.plugin import _should_remove_unsupported_resource_from_datastore from ckanext.xloader.plugin import XLoaderFormats, DEFAULT_FORMATS @@ -60,6 +62,39 @@ def test_submit_when_url_changes(self, monkeypatch): assert func.called + @pytest.mark.parametrize("toolkit_config_value, mock_xloader_formats, url_type, datastore_active, expected_result", [ + # Test1: Should pass as it is an upload with an active datastore entry but an unsupported format + (True, False, 'upload', True, True), + # Test2: Should fail as it is a supported XLoader format. + (True, True, 'upload', True, False), + # Test3: Should fail as the config option is turned off. + (False, False, 'upload', True, False), + # Test4: Should fail as the url_type is not supported. + (True, False, 'custom_type', True, False), + # Test5: Should fail as datastore is inactive. + (True, False, 'upload', False, False), + # Test6: Should pass as it is a recognised resource type with an active datastore entry but an unsupported format + (True, False, '', True, True), + # Test7: Should pass as it is a recognised resource type with an active datastore entry but an unsupported format + (True, False, None, True, True), + ]) + def test_should_remove_unsupported_resource_from_datastore( + self, toolkit_config_value, mock_xloader_formats, url_type, datastore_active, expected_result): + + # Setup mock data + res_dict = { + 'format': 'some_format', + 'url_type': url_type, + 'datastore_active': datastore_active, + 'extras': {'datastore_active': datastore_active} + } + + # Assert the result based on the logic paths covered + with helpers.changed_config('ckanext.xloader.clean_datastore_tables', toolkit_config_value): + with mock.patch('ckanext.xloader.utils.XLoaderFormats.is_it_an_xloader_format') as mock_is_xloader_format: + mock_is_xloader_format.return_value = mock_xloader_formats + assert _should_remove_unsupported_resource_from_datastore(res_dict) == expected_result + def _pending_task(self, resource_id): return { "entity_id": resource_id, diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py index 1ce60d3a..539f7535 100644 --- a/ckanext/xloader/utils.py +++ b/ckanext/xloader/utils.py @@ -3,17 +3,50 @@ import json import datetime +from six import text_type as str, binary_type + from ckan import model from ckan.lib import search from collections import defaultdict from decimal import Decimal -from six import text_type as str - import ckan.plugins as p - - -def resource_data(id, resource_id): +from ckan.plugins.toolkit import config + +# resource.formats accepted by ckanext-xloader. Must be lowercase here. +DEFAULT_FORMATS = [ + "csv", + "application/csv", + "xls", + "xlsx", + "tsv", + "application/vnd.ms-excel", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "ods", + "application/vnd.oasis.opendocument.spreadsheet", +] + +from .job_exceptions import JobError + + +class XLoaderFormats(object): + formats = None + + @classmethod + def is_it_an_xloader_format(cls, format_): + if cls.formats is None: + cls._formats = config.get("ckanext.xloader.formats") + if cls._formats is not None: + # use config value. preserves empty list as well. + cls._formats = cls._formats.lower().split() + else: + cls._formats = DEFAULT_FORMATS + if not format_: + return False + return format_.lower() in cls._formats + + +def resource_data(id, resource_id, rows=None): if p.toolkit.request.method == "POST": try: @@ -72,7 +105,12 @@ def unsupported_format(id, resource_id): extra_vars={ "resource": p.toolkit.c.resource, "pkg_dict": p.toolkit.c.pkg_dict, - }, + } + if rows: + extra_vars["rows"] = rows + return p.toolkit.render( + "xloader/resource_data.html", + extra_vars=extra_vars, ) @@ -102,6 +140,7 @@ def set_resource_metadata(update_dict): # better fix q = model.Session.query(model.Resource). \ + with_for_update(of=model.Resource). \ filter(model.Resource.id == update_dict['resource_id']) resource = q.one() @@ -110,12 +149,6 @@ def set_resource_metadata(update_dict): extras.update(update_dict) q.update({'extras': extras}, synchronize_session=False) - # TODO: Remove resource_revision_table when dropping support for 2.8 - if hasattr(model, 'resource_revision_table'): - model.Session.query(model.resource_revision_table).filter( - model.ResourceRevision.id == update_dict['resource_id'], - model.ResourceRevision.current is True - ).update({'extras': extras}, synchronize_session=False) model.Session.commit() # get package with updated resource from solr @@ -177,7 +210,7 @@ def headers_guess(rows, tolerance=1): return 0, [] -TYPES = [int, bool, str, datetime.datetime, float, Decimal] +TYPES = [int, bool, str, binary_type, datetime.datetime, float, Decimal] def type_guess(rows, types=TYPES, strict=False): @@ -203,10 +236,10 @@ def type_guess(rows, types=TYPES, strict=False): for ci, cell in enumerate(row): if not cell: continue - at_least_one_value[ci] = True for type in list(guesses[ci].keys()): if not isinstance(cell, type): guesses[ci].pop(type) + at_least_one_value[ci] = True if guesses[ci] else False # no need to set guessing weights before this # because we only accept a type if it never fails for i, guess in enumerate(guesses): @@ -238,5 +271,17 @@ def type_guess(rows, types=TYPES, strict=False): # element in case of a tie # See: http://stackoverflow.com/a/6783101/214950 guesses_tuples = [(t, guess[t]) for t in types if t in guess] + if not guesses_tuples: + raise JobError('Failed to guess types') _columns.append(max(guesses_tuples, key=lambda t_n: t_n[1])[0]) return _columns + + +def datastore_resource_exists(resource_id): + context = {'model': model, 'ignore_auth': True} + try: + response = p.toolkit.get_action('datastore_search')(context, dict( + id=resource_id, limit=0)) + except p.toolkit.ObjectNotFound: + return False + return response or {'fields': []} diff --git a/ckanext/xloader/views.py b/ckanext/xloader/views.py index e9357d3c..1af9f1fd 100644 --- a/ckanext/xloader/views.py +++ b/ckanext/xloader/views.py @@ -1,5 +1,7 @@ from flask import Blueprint +from ckan.plugins.toolkit import _, h, g, render, request, abort, NotAuthorized, get_action, ObjectNotFound + import ckanext.xloader.utils as utils @@ -12,7 +14,55 @@ def get_blueprints(): @xloader.route("/dataset//resource_data/", methods=("GET", "POST")) def resource_data(id, resource_id): - return utils.resource_data(id, resource_id) + rows = request.args.get('rows') + if rows: + try: + rows = int(rows) + if rows < 0: + rows = None + except ValueError: + rows = None + return utils.resource_data(id, resource_id, rows) + + +@xloader.route("/dataset//delete-datastore/", methods=("GET", "POST")) +def delete_datastore_table(id, resource_id): + if u'cancel' in request.form: + return h.redirect_to(u'xloader.resource_data', id=id, resource_id=resource_id) + + context = {"user": g.user} + + try: + res_dict = get_action('resource_show')(context, {"id": resource_id}) + if res_dict.get('package_id') != id: + raise ObjectNotFound + except ObjectNotFound: + return abort(404, _(u'Resource not found')) + + if request.method == 'POST': + try: + get_action('datastore_delete')(context, { + "resource_id": resource_id, + "force": True}) + except NotAuthorized: + return abort(403, _(u'Unauthorized to delete resource %s') % resource_id) + + h.flash_notice(_(u'DataStore and Data Dictionary deleted for resource %s') % resource_id) + + return h.redirect_to( + 'xloader.resource_data', + id=id, + resource_id=resource_id + ) + else: + g.resource_id = resource_id + g.package_id = id + + extra_vars = { + u"resource_id": resource_id, + u"package_id": id + } + return render(u'xloader/confirm_datastore_delete.html', extra_vars) @xloader.route("/dataset//unsupported_format/") def unsupported_format(id, resource_id): diff --git a/full_text_function.sql b/full_text_function.sql deleted file mode 100644 index 8a604258..00000000 --- a/full_text_function.sql +++ /dev/null @@ -1,16 +0,0 @@ --- _full_text fields are now updated by a trigger when set to NULL --- copied from https://github.com/ckan/ckan/pull/3786/files#diff-33d20faeb53559a9b8940bcb418cb5b4R75 -CREATE OR REPLACE FUNCTION populate_full_text_trigger() RETURNS trigger -AS $body$ - BEGIN - IF NEW._full_text IS NOT NULL THEN - RETURN NEW; - END IF; - NEW._full_text := ( - SELECT to_tsvector(string_agg(value, ' ')) - FROM json_each_text(row_to_json(NEW.*)) - WHERE key NOT LIKE '\_%'); - RETURN NEW; - END; -$body$ LANGUAGE plpgsql; -ALTER FUNCTION populate_full_text_trigger() OWNER TO ckan_default; diff --git a/requirements.txt b/requirements.txt index 58540beb..fe92b6d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ six>=1.12.0 tabulator==1.53.5 Unidecode==1.0.22 python-dateutil>=2.8.2 +chardet==5.2.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 7d7eea80..6bdefb0b 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ # Versions should comply with PEP440. For a discussion on single-sourcing # the version across setup.py and the project code, see # http://packaging.python.org/en/latest/tutorial.html#version - version='0.12.2', + version='1.0.1', description='Express Loader - quickly load data into CKAN DataStore''', long_description=long_description, @@ -38,12 +38,11 @@ # Pick your license as you wish (should match "license" above) 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)', - # Specify the Python versions you support here. In particular, ensure - # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.6', + # Specify the Python versions you support here. 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', ], @@ -85,10 +84,6 @@ [babel.extractors] ckan = ckan.lib.extract:extract_ckan - [paste.paster_command] - xloader = ckanext.xloader.paster:xloaderCommand - migrate_types = ckanext.xloader.paster:MigrateTypesCommand - ''', # If you are changing from the default layout of your extension, you may diff --git a/test.ini b/test.ini index 1415d37f..7bfab684 100644 --- a/test.ini +++ b/test.ini @@ -15,7 +15,7 @@ use = config:../ckan/test-core.ini # Insert any custom config settings to be used when running your extension's # tests here. -ckan.plugins = xloader +ckan.plugins = xloader datastore ckanext.xloader.jobs_db.uri = sqlite:////tmp/jobs.db # Logging configuration