Skip to content

Commit

Permalink
Merge pull request #615 from dbic/enh-anon-cmd
Browse files Browse the repository at this point in the history
Add a bash anon-cmd to be used to incrementally anonymize sids
  • Loading branch information
yarikoptic authored Feb 17, 2023
2 parents e0ff32d + df3811f commit be8e5ff
Showing 1 changed file with 54 additions and 0 deletions.
54 changes: 54 additions & 0 deletions utils/anon-cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/bash
# Generic anonymization script which would anonymize sid based on what it had
# seen in the past or simply what the translation dict already has.

set -eu

debug() {
: echo "DEBUG: $*" >&2
}

# Translation file location
# Store under .git by default to guarantee that it is not committed or locked by git-annex etc
# But it might not fit some usecases where there is no .git
anon_file_default=$(dirname "$0")/../.git/anon_sid_map.csv
anon_file="${AC_ANON_FILE:-$anon_file_default}"
anon_fmt="${AC_ANON_FMT:-%03d}"

sid="$1"

# harmonize since elderly awk on rolando seems to have no clue about IGNORECASE
sid=$(echo "$sid" | tr '[:lower:]' '[:upper:]')

debug "Using $anon_file to map $sid"

if [ ! -e "$anon_file" ]; then
touch "$anon_file" # initiate it
fi

# apparently heudiconv passes even those we provided in `-s` CLI option
# to anonymization script. So, we will have to match those by our format
# and then give back if matches. That would forbid plain remapping though if
# original ids are in the same format, so some folks might want to disable that!
sid_input_fmted=$(echo "$sid" | sed -e 's,^0*,,g' | xargs printf "$anon_fmt" 2>&1 || :)
if [ "$sid" = "$sid_input_fmted" ]; then
debug already in the anonymized format
echo "$sid"
exit 0
fi

res=$(grep "^$sid," "$anon_file" | head -n 1)
if [ -n "$res" ]; then
ann="${res##*,}"
debug "Found $ann in '$res'"
else
echo "We have all sids mapped already! Will not create a new one for $sid" >&2; exit 1
# need to take the latest one
largest=$(sed -e 's/.*,//g' "$anon_file" | sort -n | tail -n1 | sed -e 's,^0*,,g')
next=$((largest+1))
# shellcheck disable=SC2059
ann=$(printf "$anon_fmt" $next)
debug "Found $largest and $next to get $ann, storing"
echo "$sid,$ann" >> "$anon_file"
fi
echo "$ann"

0 comments on commit be8e5ff

Please sign in to comment.