Skip to content

Commit

Permalink
OY-4854 Switch to csv format
Browse files Browse the repository at this point in the history
  • Loading branch information
tomikat committed Jan 24, 2025
1 parent 15e2022 commit 7c38855
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 35 deletions.
2 changes: 2 additions & 0 deletions dev-resources/anonymized-persons.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
personOid,etunimet,kutsumanimi,sukunimi,kansalaisuus,hetu,syntymaaika,sukupuoli,sahkopostiosoite,puhelinnumero,asuinmaa,katuosoite,postinumero,postitoimipaikka,kotikunta,aidinkieli
1.2.246.562.24.25601805074,Testi Henkilö,Testi,Virtanen-Testi,246,090296-999D,09.02.1996,1,hakija-47904641@oph.fi,050 11581851,Suomi,Metelitie 393,00200,Helsinki,091,fi
1 change: 0 additions & 1 deletion dev-resources/anonymized-persons.json

This file was deleted.

69 changes: 41 additions & 28 deletions dev/clj/ataru/anonymizer/core.clj
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
(ns ataru.anonymizer.core
(:require [ataru.anonymizer.anonymizer-application-store :as application-store]
[cheshire.core :as json]
[clojure.data.csv :as csv]
[clojure.java.io :as io]
clojure.string
clojure.walk
[taoensso.timbre :as log]))
Expand Down Expand Up @@ -49,44 +50,56 @@
{})
:content (update (:content application) :answers #(map anonymize-answer %))})))

(defn fake-person->ataru-person [{:keys [sukupuoli
toinennimi
syntymaaika
sahkopostiosoite
(defn fake-person->ataru-person [{:keys [personOid
etunimet
kutsumanimi
sukunimi
kansalaisuus
hetu
etunimi
syntymaaika
sukupuoli
sahkopostiosoite
puhelinnumero
personOid
lahiosoite]}]
{:person-oid personOid
:fake-ssn hetu
:address lahiosoite
:email sahkopostiosoite
:last-name sukunimi
:phone puhelinnumero
:first-name (str etunimi " " toinennimi)
:preferred-name etunimi
:postal-code "00100"
:postal-office "HELSINKI"
:home-town "091"
:gender sukupuoli
:birth-date syntymaaika})
asuinmaa
katuosoite
postinumero
postitoimipaikka
kotikunta
aidinkieli]}]
{:person-oid personOid
:first-name etunimet
:preferred-name kutsumanimi
:last-name sukunimi
:nationality kansalaisuus
:fake-ssn hetu
:birth-date syntymaaika
:gender sukupuoli
:email sahkopostiosoite
:phone puhelinnumero
:country-of-residence asuinmaa
:address katuosoite
:postal-code postinumero
:postal-office postitoimipaikka
:home-town kotikunta
:language aidinkieli})

(defn file->fake-persons [file]
(log/info "Indexing persons")
(time
(->> file
(slurp)
(clojure.string/split-lines)
(map (comp fake-person->ataru-person
clojure.walk/keywordize-keys
json/parse-string))
(group-by :person-oid))))
(with-open [reader (io/reader file)]
(group-by :person-oid
(->> (let [data (csv/read-csv reader)]
(map zipmap
(->> (first data)
(map keyword)
repeat)
(rest data)))
(map fake-person->ataru-person))))))

(defn anonymize-data [& args]
(assert (not (clojure.string/blank? (second args))))
(let [fake-persons (file->fake-persons (first args))
_ (log/info "Found" (count (keys fake-persons)) "persons")
attachment-key (second args)
application-ids (application-store/get-all-application-ids)
last-id (last application-ids)]
Expand Down
2 changes: 2 additions & 0 deletions project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@
:silent false}

:profiles {:dev {:dependencies [[cider/piggieback "0.5.3"]
[org.clojure/data.csv "1.1.0"]
[figwheel-sidecar "0.5.20"]
[snipsnap "0.2.0" :exclusions [org.clojure/clojure]]
[reloaded.repl "0.2.4" :exclusions [org.clojure/tools.namespace]]
Expand All @@ -305,6 +306,7 @@
:aws-secret-key "localhost"}}

:test {:dependencies [[cider/piggieback "0.5.3"]
[org.clojure/data.csv "1.1.0"]
[figwheel-sidecar "0.5.20"]
[snipsnap "0.2.0" :exclusions [org.clojure/clojure]]
[reloaded.repl "0.2.4" :exclusions [org.clojure/tools.namespace]]
Expand Down
12 changes: 6 additions & 6 deletions spec/ataru/anonymizer/anonymizer_spec.clj
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,13 @@
:value "1"
:fieldType "textField"}
{:key "first-name"
:value "Måns Testi"
:value "Testi Henkilö"
:fieldType "textField"}
{:key "preferred-name"
:value "Måns"
:value "Testi"
:fieldType "textField"}
{:key "last-name"
:value "Sarkkinen-Testi"
:value "Virtanen-Testi"
:fieldType "textField"}
{:key "address"
:value "Metelitie 393"
Expand All @@ -110,13 +110,13 @@
:value "hakija-47904641@oph.fi"
:fieldType "textField"}
{:key "postal-code"
:value "00100"
:value "00200"
:fieldType "textField"}
{:key "birth-date"
:value "09.02.1996"
:fieldType "textField"}
{:key "postal-office"
:value "HELSINKI"
:value "Helsinki"
:fieldType "textField"}
{:key "home-town"
:value "091"
Expand Down Expand Up @@ -187,7 +187,7 @@
(create-answer {:value ["huoltajaY@oph.fi"] :key "guardian-email-secondary"})]))
(it "should anonymize application"
(let [initial-application (store/get-application @test-application-id)]
(core/anonymize-data "dev-resources/anonymized-persons.json" "f996b389-2f36-4ba2-8139-6a7acefe0e3e" true)
(core/anonymize-data "dev-resources/anonymized-persons.csv" "f996b389-2f36-4ba2-8139-6a7acefe0e3e" true)
(let [anonymized-application (store/get-application @test-application-id)]
(should= (expected-anonymized-application)
(-> anonymized-application
Expand Down

0 comments on commit 7c38855

Please sign in to comment.