Skip to content

Commit

Permalink
split parser; clened main script; added entrypoint set to deepImpute
Browse files Browse the repository at this point in the history
  • Loading branch information
Puumanamana committed Feb 20, 2020
1 parent a3e1b10 commit 19b4d90
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 419 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
files = ./setup.py
commit = True
tag = True
current_version = 0.0.1
current_version = 1.1

1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
*#*
*.egg-info*
dist
*.pyc
.coverage
.vscode
Expand Down
3 changes: 1 addition & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,4 @@ MAINTAINER Breck Yunits <byunits@cc.hawaii.edu>

RUN apt-get update && apt-get install -y git

RUN git clone https://github.com/lanagarmire/deepimpute && cd deepimpute && pip install --user .

RUN git clone https://github.com/lanagarmire/deepimpute && cd deepimpute && pip install --user .
45 changes: 32 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,14 @@ These instructions will get you a copy of the project up and running on your loc

### Installing

To install DeepImpute, you only need to download the git repository at https://github.com/lanagarmire/deepimpute and install it using pip:
You can install DeepImpute's latest release using pip with the following command:

```bash
pip install deepimpute
```

To install the latest GitHub version, you can also clone this directory and
install it:

```bash
git clone https://github.com/lanagarmire/deepimpute
Expand All @@ -29,14 +36,15 @@ DeepImpute can be used either on the command line or as a Python package.
Command line:

```
usage: deepImpute.py [-h] [-o O] [--cores CORES] [--cell-axis {rows,columns}]
[--limit LIMIT] [--minVMR MINVMR] [--subset SUBSET]
[--learning-rate LEARNING_RATE] [--batch-size BATCH_SIZE]
[--max-epochs MAX_EPOCHS]
[--hidden-neurons HIDDEN_NEURONS]
[--dropout-rate DROPOUT_RATE]
[--output-neurons OUTPUT_NEURONS]
inputFile
usage: deepImpute [-h] [-o OUTPUT] [--cores CORES]
[--cell-axis {rows,columns}] [--limit LIMIT]
[--minVMR MINVMR] [--subset SUBSET]
[--learning-rate LEARNING_RATE] [--batch-size BATCH_SIZE]
[--max-epochs MAX_EPOCHS] [--hidden-neurons HIDDEN_NEURONS]
[--dropout-rate DROPOUT_RATE]
[--output-neurons OUTPUT_NEURONS] [--n_pred N_PRED]
[--policy POLICY]
inputFile
scRNA-seq data imputation using DeepImpute.
Expand All @@ -45,7 +53,8 @@ positional arguments:
optional arguments:
-h, --help show this help message and exit
-o O Path to output data counts. Default: ./
-o OUTPUT, --output OUTPUT
Path to output data counts. Default: ./imputed.csv
--cores CORES Number of cores. Default: all available cores
--cell-axis {rows,columns}
Cell dimension in the matrix. Default: rows
Expand All @@ -68,16 +77,26 @@ optional arguments:
Dropout rate for the hidden dropout layer (0<rate<1).
Default: 0.2
--output-neurons OUTPUT_NEURONS
Number of output neurons per sub-network. Default: 512```
Number of output neurons per sub-network. Default: 512
--n_pred N_PRED Number of predictors to consider. Consider using this
parameter if your RAM is limited or if you have a high
number of features. Default: All genes with nonzero
VMR
--policy POLICY Whether to restore positive values from the raw
dataset or keep the max between the imputed values and
the raw values. Choices are ['restore', 'max'].
Default: restore
```

Python package:

```python
from deepimpute.deepImpute import deepImpute
from deepimpute.multinet import MultiNet

data = pd.read_csv('examples/test.csv', index_col=0) # dimension = (cells x genes)
imputed = deepImpute(data, NN_lim='auto', n_cores=16, cell_subset=1)
model = MultiNet()
model.fit(data)
imputed = model.predict(data)
```

A more detailed usage of deepImpute's functionality is available in the iPython Notebook notebook_example.ipynb
Expand Down
133 changes: 21 additions & 112 deletions deepimpute/deepImpute.py
Original file line number Diff line number Diff line change
@@ -1,107 +1,17 @@
def deepImpute(
data,
NN_lim="auto",
cell_subset=1,
imputed_only=False,
policy="restore",
minVMR=0.5,
n_pred=None,
**NN_params
):
from deepimpute.multinet import MultiNet
import pandas as pd

multi = MultiNet(**NN_params)
multi.fit(data, NN_lim=NN_lim, cell_subset=cell_subset, minVMR=minVMR, n_pred=n_pred)
return multi.predict(data, imputed_only=imputed_only, policy=policy)

if __name__ == "__main__":
import argparse
import pandas as pd
from deepimpute.parser import parse_args
from deepimpute.multinet import MultiNet

parser = argparse.ArgumentParser(
description="scRNA-seq data imputation using DeepImpute."
)
parser.add_argument("inputFile", type=str, help="Path to input data.")
parser.add_argument(
"-o",
type=str,
help="Path to output data counts. Default: ./",
)
parser.add_argument(
"--cores", type=int, default=-1, help="Number of cores. Default: all available cores"
)
parser.add_argument(
"--cell-axis",
type=str,
choices=["rows", "columns"],
default=0,
help="Cell dimension in the matrix. Default: rows",
)
parser.add_argument(
"--limit",
type=str,
default="auto",
help="Genes to impute (e.g. first 2000 genes). Default: auto",
)
parser.add_argument(
"--minVMR",
type=float,
default="0.5",
help="Min Variance over mean ratio for gene exclusion. Gene with a VMR below ${minVMR} are discarded. Used if --limit is set to 'auto'. Default: 0.5",
)
parser.add_argument(
"--subset",
type=float,
default=1,
help="Cell subset to speed up training. \
Either a ratio (0<x<1) or a cell number (int). Default: 1 (all)",
)
parser.add_argument(
"--learning-rate",
type=float,
default=0.0005,
help="Learning rate. Default: 0.0001"
)
parser.add_argument(
"--batch-size",
type=int,
default=64,
help="Batch size. Default: 64"
)
parser.add_argument(
"--max-epochs",
type=int,
default=300,
help="Maximum number of epochs. Default: 500"
)
parser.add_argument(
"--hidden-neurons",
type=int,
default=300,
help="Number of neurons in the hidden dense layer. Default: 256"
)
parser.add_argument(
"--dropout-rate",
type=float,
default=0.2,
help="Dropout rate for the hidden dropout layer (0<rate<1). Default: 0.2"
)
parser.add_argument(
"--output-neurons",
type=int,
default=512,
help="Number of output neurons per sub-network. Default: 512"
)
parser.add_argument(
"--n_pred",
type=int,
default=None,
help="Number of predictors to consider. Consider using this parameter if your RAM is limited or if you have a high number of features. Default: All genes with nonzero VMR"
)
def deepImpute(**kwargs):

args = parser.parse_args()
args = parse_args()

for key, value in kwargs.items():
setattr(args, key, value)

data = pd.read_csv(args.inputFile, index_col=0)

if args.cell_axis == "columns":
data = data.T

Expand All @@ -115,17 +25,16 @@ def deepImpute(
{"type": "dense", "activation": "relu", "neurons": args.hidden_neurons},
{"type": "dropout", "activation": "dropout", "rate": args.dropout_rate}]
}

imputed = deepImpute(
data,
NN_lim=args.limit,
cell_subset=args.subset,
minVMR=args.minVMR,
n_pred=args.n_pred,
**NN_params
)

if args.cell_axis == "columns":
imputed = imputed.T

imputed.to_csv(args.o)
multi = MultiNet(**NN_params)
multi.fit(data, NN_lim=args.limit, cell_subset=args.subset, minVMR=args.minVMR, n_pred=args.n_pred)

imputed = multi.predict(data, imputed_only=False, policy=args.policy)

if args.output is not None:
imputed.to_csv(args.output)
else:
return imputed

if __name__ == "__main__":
deepImpute()
96 changes: 96 additions & 0 deletions deepimpute/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import argparse

def parse_args():
parser = argparse.ArgumentParser(
description="scRNA-seq data imputation using DeepImpute."
)
parser.add_argument("inputFile", type=str, help="Path to input data.")
parser.add_argument(
"-o", "--output",
type=str,
default="./imputed.csv",
help="Path to output data counts. Default: ./imputed.csv",
)
parser.add_argument(
"--cores", type=int, default=-1, help="Number of cores. Default: all available cores"
)
parser.add_argument(
"--cell-axis",
type=str,
choices=["rows", "columns"],
default="rows",
help="Cell dimension in the matrix. Default: rows",
)
parser.add_argument(
"--limit",
type=str,
default="auto",
help="Genes to impute (e.g. first 2000 genes). Default: auto",
)
parser.add_argument(
"--minVMR",
type=float,
default="0.5",
help="Min Variance over mean ratio for gene exclusion. Gene with a VMR below ${minVMR} are discarded. Used if --limit is set to 'auto'. Default: 0.5",
)
parser.add_argument(
"--subset",
type=float,
default=1,
help="Cell subset to speed up training. \
Either a ratio (0<x<1) or a cell number (int). Default: 1 (all)",
)
parser.add_argument(
"--learning-rate",
type=float,
default=0.0005,
help="Learning rate. Default: 0.0001"
)
parser.add_argument(
"--batch-size",
type=int,
default=64,
help="Batch size. Default: 64"
)
parser.add_argument(
"--max-epochs",
type=int,
default=300,
help="Maximum number of epochs. Default: 500"
)
parser.add_argument(
"--hidden-neurons",
type=int,
default=300,
help="Number of neurons in the hidden dense layer. Default: 256"
)
parser.add_argument(
"--dropout-rate",
type=float,
default=0.2,
help="Dropout rate for the hidden dropout layer (0<rate<1). Default: 0.2"
)
parser.add_argument(
"--output-neurons",
type=int,
default=512,
help="Number of output neurons per sub-network. Default: 512"
)
parser.add_argument(
"--n_pred",
type=int,
default=None,
help="Number of predictors to consider. Consider using this parameter if your RAM is limited or if you have a high number of features. Default: All genes with nonzero VMR"
)

parser.add_argument(
"--policy",
type=str,
default='restore',
help="Whether to restore positive values from the raw dataset or keep the max between the imputed values and the raw values. Choices are ['restore', 'max']. Default: restore"
)

args = parser.parse_args()

return args

Loading

0 comments on commit 19b4d90

Please sign in to comment.