Skip to content
This repository has been archived by the owner on Jul 29, 2020. It is now read-only.

Metadata sync #244

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ before_install:
# Update conda itself
- conda update --yes conda
install:
- travis_retry conda create --yes -n labadmin python=2.7 pip
- travis_retry conda create --yes -n labadmin python=2.7 pip xlrd
- source activate labadmin
- pip install -U pip
- pip install -U click natsort coverage coveralls
Expand Down
33 changes: 32 additions & 1 deletion knimin/handlers/ag_pulldown.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from future.utils import viewitems
from StringIO import StringIO
import pandas as pd
import csv
import numpy as np

from knimin.handlers.base import BaseHandler
from knimin import db
Expand Down Expand Up @@ -99,7 +101,22 @@ def post(self):
# transform each survey into a pandas dataframe for later merge
# read all columns as string to avoid unintened conversions,
# like cutting leading zeros of barcodes
pd_meta = pd.read_csv(StringIO(meta), sep="\t", dtype=str)

f = open('/tmp/foo%d' % survey, 'w')
f.write(meta)
f.close()

pd_meta = pd.read_csv(StringIO(meta), sep="\t",
encoding='iso-8859-1',
quoting=csv.QUOTE_NONE,
dtype=unicode)

# these are replicated and useless anyway
if 'ALTITUDE' in pd_meta.columns:
pd_meta.drop(['ALTITUDE'], axis=1, inplace=True)
if 'DEPTH' in pd_meta.columns:
pd_meta.drop(['DEPTH'], axis=1, inplace=True)

# reset the index to barcodes = here sample_name
pd_meta.set_index('sample_name', inplace=True)
results_as_pd.append(pd_meta)
Expand All @@ -108,9 +125,23 @@ def post(self):
if self.get_argument('merged', default='False') == 'True':
pd_all = pd.DataFrame()
if len(results_as_pd) > 0:
cols = set()
for df in results_as_pd:
if set(df.columns).intersection(cols):
df.drop(set(df.columns).intersection(cols), axis=1, inplace=True)
cols.update(set(df.columns))
pd_all = pd.concat(results_as_pd, join='outer', axis=1)
pd_all['DEPTH'] = 'Not applicable'
pd_all['ALTITUDE'] = 'Not applicable'
pd_all['HAS_PHYSICAL_SPECIMEN'] = 'Not applicable'
pd_all.fillna('Not provided', inplace=True)
if 'ASSIGNED_FROM_GEO.1' in pd_all.columns:
pd_all.drop(['ASSIGNED_FROM_GEO.1'], axis=1, inplace=True)

meta_zip.append('surveys_merged_md.txt',
pd_all.to_csv(sep='\t',
quoting=csv.QUOTE_NONE,
encoding='iso-8859-1',
index_label='sample_name'))

# write out zip file
Expand Down
54 changes: 27 additions & 27 deletions knimin/lib/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,17 +151,17 @@
'BODY_HABITAT': 'UBERON:hair',
'ENV_MATERIAL': 'sebum',
'ENV_PACKAGE': 'human-associated',
'DESCRIPTION': 'American Gut Project Hair sample',
'DESCRIPTION': 'American Gut Project Hair Sample',
'BODY_SITE': 'UBERON:hair'},
'Nares': {
'BODY_PRODUCT': 'UBERON:mucus',
'SAMPLE_TYPE': 'Nares',
'SCIENTIFIC_NAME': 'human nasal/pharyngeal metagenome',
'SCIENTIFIC_NAME': 'human nasopharyngeal metagenome',
'TAXON_ID': '1131769',
'BODY_HABITAT': 'UBERON:nose',
'ENV_MATERIAL': 'mucus',
'ENV_PACKAGE': 'human-skin',
'DESCRIPTION': 'American Gut Project Nares sample',
'ENV_PACKAGE': 'human-associated',
'DESCRIPTION': 'American Gut Project Nares Sample',
'BODY_SITE': 'UBERON:nostril'},
'Vaginal mucus': {
'BODY_PRODUCT': 'UBERON:mucus',
Expand All @@ -171,7 +171,7 @@
'BODY_HABITAT': 'UBERON:vagina',
'ENV_MATERIAL': 'mucus',
'ENV_PACKAGE': 'human-vaginal',
'DESCRIPTION': 'American Gut Project Vaginal mucus sample',
'DESCRIPTION': 'American Gut Project Vaginal Mucus Sample',
'BODY_SITE': 'UBERON:vaginal introitus'},
'Sole of foot': {
'BODY_PRODUCT': 'UBERON:sebum',
Expand All @@ -181,17 +181,17 @@
'BODY_HABITAT': 'UBERON:skin',
'ENV_MATERIAL': 'sebum',
'ENV_PACKAGE': 'human-skin',
'DESCRIPTION': 'American Gut Project Sole of foot sample',
'DESCRIPTION': 'American Gut Project Sole of Foot Sample',
'BODY_SITE': 'UBERON:skin of foot'},
'Nasal mucus': {
'BODY_PRODUCT': 'UBERON:mucus',
'SAMPLE_TYPE': 'Mucus',
'SCIENTIFIC_NAME': 'human nasal/pharyngeal metagenome',
'SAMPLE_TYPE': 'Nares',
'SCIENTIFIC_NAME': 'human nasopharyngeal metagenome',
'TAXON_ID': '1131769',
'BODY_HABITAT': 'UBERON:nose',
'ENV_MATERIAL': 'mucus',
'ENV_PACKAGE': 'human-associated',
'DESCRIPTION': 'American Gut Project Nasal mucus sample',
'DESCRIPTION': 'American Gut Project Nares Sample',
'BODY_SITE': 'UBERON:nostril'},
'Stool': {
'BODY_PRODUCT': 'UBERON:feces',
Expand All @@ -201,7 +201,7 @@
'BODY_HABITAT': 'UBERON:feces',
'ENV_MATERIAL': 'feces',
'ENV_PACKAGE': 'human-gut',
'DESCRIPTION': 'American Gut Project Stool sample',
'DESCRIPTION': 'American Gut Project Stool Sample',
'BODY_SITE': 'UBERON:feces'},
'Forehead': {
'BODY_PRODUCT': 'UBERON:sebum',
Expand All @@ -211,7 +211,7 @@
'BODY_HABITAT': 'UBERON:skin',
'ENV_MATERIAL': 'sebum',
'ENV_PACKAGE': 'human-skin',
'DESCRIPTION': 'American Gut Project Forehead sample',
'DESCRIPTION': 'American Gut Project Forehead Sample',
'BODY_SITE': 'UBERON:skin of head'},
'Tears': {
'BODY_PRODUCT': 'UBERON:tears',
Expand All @@ -221,7 +221,7 @@
'BODY_HABITAT': 'UBERON:eye',
'ENV_MATERIAL': 'tears',
'ENV_PACKAGE': 'human-associated',
'DESCRIPTION': 'American Gut Project Tears sample',
'DESCRIPTION': 'American Gut Project Tears Sample',
'BODY_SITE': 'UBERON:eye'},
'Right hand': {
'BODY_PRODUCT': 'UBERON:sebum',
Expand All @@ -231,7 +231,7 @@
'BODY_HABITAT': 'UBERON:skin',
'ENV_MATERIAL': 'sebum',
'ENV_PACKAGE': 'human-skin',
'DESCRIPTION': 'American Gut Project Right Hand sample',
'DESCRIPTION': 'American Gut Project Right Hand Sample',
'BODY_SITE': 'UBERON:skin of hand'},
'Torso': {
'BODY_PRODUCT': 'UBERON:sebum',
Expand All @@ -240,7 +240,7 @@
'TAXON_ID': '539655',
'BODY_HABITAT': 'UBERON:skin',
'ENV_MATTER': 'sebum',
'DESCRIPTION': 'American Gut Project torso sample',
'DESCRIPTION': 'American Gut Project Torso Sample',
'BODY_SITE': 'UBERON:skin of trunk'},
'Left leg': {
'BODY_PRODUCT': 'UBERON:sebum',
Expand All @@ -249,7 +249,7 @@
'TAXON_ID': '539655',
'BODY_HABITAT': 'UBERON:skin',
'ENV_MATTER': 'sebum',
'DESCRIPTION': 'American Gut Project left leg sample',
'DESCRIPTION': 'American Gut Project Left Leg Sample',
'BODY_SITE': 'UBERON:skin of leg'},
'Right leg': {
'BODY_PRODUCT': 'UBERON:sebum',
Expand All @@ -258,7 +258,7 @@
'TAXON_ID': '539655',
'BODY_HABITAT': 'UBERON:skin',
'ENV_MATTER': 'sebum',
'DESCRIPTION': 'American Gut Project right leg sample',
'DESCRIPTION': 'American Gut Project Right Leg Sample',
'BODY_SITE': 'UBERON:skin of leg'},
'Mouth': {
'BODY_PRODUCT': 'UBERON:saliva',
Expand All @@ -268,7 +268,7 @@
'BODY_HABITAT': 'UBERON:oral cavity',
'ENV_MATERIAL': 'saliva',
'ENV_PACKAGE': 'human-oral',
'DESCRIPTION': 'American Gut Project Mouth sample',
'DESCRIPTION': 'American Gut Project Mouth Sample',
'BODY_SITE': 'UBERON:tongue'},
'Left hand': {
'BODY_PRODUCT': 'UBERON:sebum',
Expand All @@ -278,7 +278,7 @@
'BODY_HABITAT': 'UBERON:skin',
'ENV_MATERIAL': 'sebum',
'ENV_PACKAGE': 'human-skin',
'DESCRIPTION': 'American Gut Project Left Hand sample',
'DESCRIPTION': 'American Gut Project Left Hand Sample',
'BODY_SITE': 'UBERON:skin of hand'},
'Ear wax': {
'BODY_PRODUCT': 'UBERON:cerumen',
Expand All @@ -288,7 +288,7 @@
'BODY_HABITAT': 'UBERON:ear',
'ENV_MATERIAL': 'ear wax',
'ENV_PACKAGE': 'human-associated',
'DESCRIPTION': 'American Gut Project Ear wax sample',
'DESCRIPTION': 'American Gut Project Ear Wax Sample',
'BODY_SITE': 'UBERON:external auditory meatus'}
}

Expand Down Expand Up @@ -490,7 +490,7 @@ def default_blank():


blanks_values = defaultdict(default_blank,
ASSIGNED_FROM_GEO="No",
ASSIGNED_FROM_GEO="false",
COMMON_NAME="unclassified metagenome",
COUNTRY="USA",
ELEVATION='193.0',
Expand All @@ -502,7 +502,7 @@ def default_blank():
LATITUDE='32.8',
LONGITUDE='-117.2',
GEO_LOC_NAME='USA:CA:San Diego',
PUBLIC='Yes',
PUBLIC='true',
SAMPLE_TYPE='control blank',
SCIENTIFIC_NAME='metagenome',
STATE='CA',
Expand All @@ -512,9 +512,9 @@ def default_blank():
PHYSICAL_SPECIMEN_LOCATION='UCSDMI',
PHYSICAL_SPECIMEN_REMAINING='FALSE',
DESCRIPTION='American Gut control',
SUBSET_AGE=str(False),
SUBSET_DIABETES=str(False),
SUBSET_IBD=str(False),
SUBSET_ANTIBIOTIC_HISTORY=str(False),
SUBSET_BMI=str(False),
SUBSET_HEALTHY=str(False))
SUBSET_AGE='false',
SUBSET_DIABETES='false',
SUBSET_IBD='false',
SUBSET_ANTIBIOTIC_HISTORY='false',
SUBSET_BMI='false',
SUBSET_HEALTHY='false')
Loading