-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #27 from hubmapconsortium/icaoberg-metadata-fix
Metadata fixes
- Loading branch information
Showing
5 changed files
with
167 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,10 @@ | ||
<img src="images/logo.png" /> | ||
|
||
Python package that creates a submission for dbGaP. | ||
This Python package automates the process of preparing and validating data from the HuBMAP project for submission to dbGaP. It helps format metadata and data into the required structure, ensuring compliance with dbGaP's submission guidelines for genomic and phenotype data. | ||
|
||
For more information, visit [HuBMAP](https://www.hubmapconsortium.org/) and [dbGaP](https://www.ncbi.nlm.nih.gov/gap). | ||
|
||
--- | ||
Copyright © 2020-2025 Pittsburgh Supercomputing Center. All Rights Reserved. | ||
|
||
The [Biomedical Applications Group](https://www.psc.edu/biomedical-applications/) at the [Pittsburgh Supercomputing Center](http://www.psc.edu) in the [Mellon College of Science](https://www.cmu.edu/mcs/) at [Carnegie Mellon University](http://www.cmu.edu). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "40798f90-c6d4-418f-832b-390c711b306e", | ||
"metadata": {}, | ||
"source": [ | ||
"# phs002267.v1 - CalTech" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "65f08289-536a-4511-8190-b2aecb20a92d", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import hubmapdbgap\n", | ||
"import hubmapbags\n", | ||
"import requests\n", | ||
"import pandas as pd\n", | ||
"from pprint import pprint\n", | ||
"\n", | ||
"token = 'TOKEN'\n", | ||
"\n", | ||
"def get_hubmap_ids(group_name: str) -> dict:\n", | ||
" url = \"https://search.api.hubmapconsortium.org/v3/search\"\n", | ||
"\n", | ||
" headers = {\"Accept\": \"application/json\"}\n", | ||
"\n", | ||
" body = {\n", | ||
" \"size\": 500,\n", | ||
" \"_source\": {\n", | ||
" \"include\": [\"hubmap_id\", \"uuid\", \"group_name\", \"dataset_type\", \"status\", \"data_types\",\"contains_human_genetic_sequences\"]\n", | ||
" },\n", | ||
" \"query\": {\n", | ||
" \"bool\": {\n", | ||
" \"must\": [{\"match_phrase\": {\"group_name\": group_name}}],\n", | ||
" \"filter\": [{\"match\": {\"entity_type\": \"Dataset\"}}],\n", | ||
" }\n", | ||
" },\n", | ||
" }\n", | ||
"\n", | ||
" answer = requests.post(url=url, headers=headers, json=body).json()\n", | ||
"\n", | ||
" if \"error\" in answer.keys():\n", | ||
" warning(answer[\"error\"])\n", | ||
" return None\n", | ||
"\n", | ||
" data = answer[\"hits\"][\"hits\"]\n", | ||
"\n", | ||
" results = []\n", | ||
" for datum in data:\n", | ||
" results.append(\n", | ||
" {\n", | ||
" \"uuid\": datum[\"_source\"][\"uuid\"],\n", | ||
" \"hubmap_id\": datum[\"_source\"][\"hubmap_id\"],\n", | ||
" \"status\": datum[\"_source\"][\"status\"],\n", | ||
" \"dataset_type\": datum[\"_source\"][\"dataset_type\"],\n", | ||
" \"is_protected\": datum[\"_source\"][\"contains_human_genetic_sequences\"],\n", | ||
" }\n", | ||
" )\n", | ||
" \n", | ||
" return results\n", | ||
"\n", | ||
"group_name = 'California Institute of Technology TMC'\n", | ||
"dbgap_study_id = 'phs002267'\n", | ||
"data = get_hubmap_ids(group_name)\n", | ||
"df = pd.DataFrame(data)\n", | ||
"df = df[(df[\"is_protected\"]==True) & (df[\"status\"]==\"Published\")]\n", | ||
"\n", | ||
"hubmap_ids = list(df['hubmap_id'])\n", | ||
"print(f'List of total datasets to include in study is {len(hubmap_ids)}')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "c5d85dd1-30df-4ac6-9d77-90629f97f273", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"data = hubmapdbgap.create.submission(hubmap_ids, dbgap_study_id=dbgap_study_id, token=token, prepend_sample_id=True )" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.7" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,27 @@ | ||
from setuptools import setup | ||
from setuptools import setup, find_packages | ||
|
||
setup( | ||
name="hubmap-dbgap", | ||
version="1.0", | ||
version="2025.01", | ||
description="Generates submission for dbGaP", | ||
url="https://github.com/hubmapconsortium/py-hubmap-dbgap", | ||
author="Ivan Cao-Berg, Gesina Phillips", | ||
author_email="icaoberg@psc.edu", | ||
license="MIT", # Add a license (you can change this if needed) | ||
install_requires=[ | ||
"pandas", | ||
"numpy", | ||
"tabulate", | ||
"tqdm", | ||
], | ||
packages=["hubmapdbgap"], | ||
packages=find_packages(), # This will automatically find your package directories | ||
classifiers=[ # These help users find your package based on its usage | ||
"Programming Language :: Python :: 3", | ||
"License :: OSI Approved :: MIT License", # Make sure this matches your license | ||
"Operating System :: OS Independent", | ||
], | ||
python_requires=">=3.7", # Adjust according to your code's Python version compatibility | ||
long_description_content_type="text/markdown", # If you use a README.md file | ||
long_description=open('README.md').read(), # This reads your README file (make sure you have one) | ||
) | ||
|