Skip to content

Commit

Permalink
added structure for supplying extra json records with (optional) file…
Browse files Browse the repository at this point in the history
… metadata

for the uploaded sample files. (#5)
  • Loading branch information
landreev committed Oct 7, 2019
1 parent 7d00b37 commit cb0f21f
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
15 changes: 13 additions & 2 deletions create_sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def check_dataset_lock(dataset_dbid):
dataset_pid = resp.json()['data']['persistentId']
dataset_dbid = resp.json()['data']['id']
files_dir = path.replace(json_file, '') + 'files'
filemetadata_dir = path.replace(json_file, '') + '.filemetadata'
print(files_dir)
for path,subdir,files in os.walk(files_dir):
for name in files:
Expand All @@ -73,9 +74,19 @@ def check_dataset_lock(dataset_dbid):
## This lock check and sleep is here to prevent the dataset from being permanently
## locked because a tabular file was uploaded first.
check_dataset_lock(dataset_dbid)
file_metadata = {}
# TODO: Think more about where the description comes from. A "sidecar" file as proposed at https://github.com/IQSS/dataverse/issues/5924#issuecomment-499605672 ?
#file_metadata['description'] = 'Sidecar?'
# L.A.: I implemented something along these lines - an (optional) directory called ".filemetadata"
# in the dataset directory, where files containing extra json filemetadata records may be
# placed for each of the files in the "files" directory.
# (since the file names must be unique per dataset, even with folders, the .filemetadata
# directory structure is flat)
# check for optional filemetadata file:
filemetadatapath = os.path.join(filemetadata_dir, filename);
if (os.path.exists(filemetadatapath)):
with open(filemetadatapath) as m:
file_metadata = json.load(m)
else:
file_metadata = {}
file_metadata['directoryLabel'] = directoryLabel
jsonData = json.dumps(file_metadata)
data = { 'jsonData' : jsonData }
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"description": "The tabular file contains information on known Harvard repositories on GitHub, such as the number of stars, programming language, day last updated, number of open issues, size, number of forks, repository URL, create date, and description."
}

0 comments on commit cb0f21f

Please sign in to comment.