split parser; clened main script; added entrypoint set to deepImpute

lanagarmire · Feb 20, 2020 · 19b4d90 · 19b4d90
1 parent a3e1b10
commit 19b4d90
Show file tree

Hide file tree

Showing 10 changed files with 188 additions and 419 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -2,5 +2,5 @@
 files = ./setup.py
 commit = True
 tag = True
-current_version = 0.0.1
+current_version = 1.1
 
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 *#*
 *.egg-info*
+dist
 *.pyc
 .coverage
 .vscode

diff --git a/Dockerfile b/Dockerfile
@@ -4,5 +4,4 @@ MAINTAINER Breck Yunits <byunits@cc.hawaii.edu>
 
 RUN apt-get update && apt-get install -y git
 
-RUN git clone https://github.com/lanagarmire/deepimpute && cd deepimpute && pip install --user .
-
+RUN git clone https://github.com/lanagarmire/deepimpute && cd deepimpute && pip install --user .
diff --git a/README.md b/README.md
@@ -14,7 +14,14 @@ These instructions will get you a copy of the project up and running on your loc
 
 ### Installing
 
-To install DeepImpute, you only need to download the git repository at https://github.com/lanagarmire/deepimpute and install it using pip:
+You can install DeepImpute's latest release using pip with the following command:
+
+```bash
+pip install deepimpute
+```
+
+To install the latest GitHub version, you can also clone this directory and
+install it: 
 
 ```bash
 git clone https://github.com/lanagarmire/deepimpute
@@ -29,14 +36,15 @@ DeepImpute can be used either on the command line or as a Python package.
 Command line:
 
 ```
-usage: deepImpute.py [-h] [-o O] [--cores CORES] [--cell-axis {rows,columns}]
-                     [--limit LIMIT] [--minVMR MINVMR] [--subset SUBSET]
-                     [--learning-rate LEARNING_RATE] [--batch-size BATCH_SIZE]
-                     [--max-epochs MAX_EPOCHS]
-                     [--hidden-neurons HIDDEN_NEURONS]
-                     [--dropout-rate DROPOUT_RATE]
-                     [--output-neurons OUTPUT_NEURONS]
-                     inputFile
+usage: deepImpute [-h] [-o OUTPUT] [--cores CORES]
+                  [--cell-axis {rows,columns}] [--limit LIMIT]
+                  [--minVMR MINVMR] [--subset SUBSET]
+                  [--learning-rate LEARNING_RATE] [--batch-size BATCH_SIZE]
+                  [--max-epochs MAX_EPOCHS] [--hidden-neurons HIDDEN_NEURONS]
+                  [--dropout-rate DROPOUT_RATE]
+                  [--output-neurons OUTPUT_NEURONS] [--n_pred N_PRED]
+                  [--policy POLICY]
+                  inputFile
 
 scRNA-seq data imputation using DeepImpute.
 
@@ -45,7 +53,8 @@ positional arguments:
 
 optional arguments:
   -h, --help            show this help message and exit
-  -o O                  Path to output data counts. Default: ./
+  -o OUTPUT, --output OUTPUT
+                        Path to output data counts. Default: ./imputed.csv
   --cores CORES         Number of cores. Default: all available cores
   --cell-axis {rows,columns}
                         Cell dimension in the matrix. Default: rows
@@ -68,16 +77,26 @@ optional arguments:
                         Dropout rate for the hidden dropout layer (0<rate<1).
                         Default: 0.2
   --output-neurons OUTPUT_NEURONS
-                        Number of output neurons per sub-network. Default: 512```
+                        Number of output neurons per sub-network. Default: 512
+  --n_pred N_PRED       Number of predictors to consider. Consider using this
+                        parameter if your RAM is limited or if you have a high
+                        number of features. Default: All genes with nonzero
+                        VMR
+  --policy POLICY       Whether to restore positive values from the raw
+                        dataset or keep the max between the imputed values and
+                        the raw values. Choices are ['restore', 'max'].
+                        Default: restore
 ```
 
 Python package:
 
 ```python
-from deepimpute.deepImpute import deepImpute
+from deepimpute.multinet import MultiNet
 
 data = pd.read_csv('examples/test.csv', index_col=0) # dimension = (cells x genes)
-imputed = deepImpute(data, NN_lim='auto', n_cores=16, cell_subset=1)
+model = MultiNet()
+model.fit(data)
+imputed = model.predict(data)
 ```
 
 A more detailed usage of deepImpute's functionality is available in the iPython Notebook notebook_example.ipynb

diff --git a/deepimpute/deepImpute.py b/deepimpute/deepImpute.py
@@ -1,107 +1,17 @@
-def deepImpute(
-        data,
-        NN_lim="auto",
-        cell_subset=1,
-        imputed_only=False,
-        policy="restore",
-        minVMR=0.5,
-        n_pred=None,
-        **NN_params
-):
-    from deepimpute.multinet import MultiNet
+import pandas as pd
 
-    multi = MultiNet(**NN_params)
-    multi.fit(data, NN_lim=NN_lim, cell_subset=cell_subset, minVMR=minVMR, n_pred=n_pred)
-    return multi.predict(data, imputed_only=imputed_only, policy=policy)
-
-if __name__ == "__main__":
-    import argparse
-    import pandas as pd
+from deepimpute.parser import parse_args
+from deepimpute.multinet import MultiNet
 
-    parser = argparse.ArgumentParser(
-        description="scRNA-seq data imputation using DeepImpute."
-    )
-    parser.add_argument("inputFile", type=str, help="Path to input data.")
-    parser.add_argument(
-        "-o",
-        type=str,
-        help="Path to output data counts. Default: ./",
-    )
-    parser.add_argument(
-        "--cores", type=int, default=-1, help="Number of cores. Default: all available cores"
-    )
-    parser.add_argument(
-        "--cell-axis",
-        type=str,
-        choices=["rows", "columns"],
-        default=0,
-        help="Cell dimension in the matrix. Default: rows",
-    )
-    parser.add_argument(
-        "--limit",
-        type=str,
-        default="auto",
-        help="Genes to impute (e.g. first 2000 genes). Default: auto",
-    )
-    parser.add_argument(
-        "--minVMR",
-        type=float,
-        default="0.5",
-        help="Min Variance over mean ratio for gene exclusion. Gene with a VMR below ${minVMR} are discarded. Used if --limit is set to 'auto'. Default: 0.5",
-    )
-    parser.add_argument(
-        "--subset",
-        type=float,
-        default=1,
-        help="Cell subset to speed up training. \
-                        Either a ratio (0<x<1) or a cell number (int). Default: 1 (all)",
-    )
-    parser.add_argument(
-        "--learning-rate",
-        type=float,
-        default=0.0005,
-        help="Learning rate. Default: 0.0001"
-    )
-    parser.add_argument(
-        "--batch-size",
-        type=int,
-        default=64,
-        help="Batch size. Default: 64"
-    )
-    parser.add_argument(
-        "--max-epochs",
-        type=int,
-        default=300,
-        help="Maximum number of epochs. Default: 500"
-    )
-    parser.add_argument(
-        "--hidden-neurons",
-        type=int,
-        default=300,
-        help="Number of neurons in the hidden dense layer. Default: 256"
-    )
-    parser.add_argument(
-        "--dropout-rate",
-        type=float,
-        default=0.2,
-        help="Dropout rate for the hidden dropout layer (0<rate<1). Default: 0.2"
-    )
-    parser.add_argument(
-        "--output-neurons",
-        type=int,
-        default=512,
-        help="Number of output neurons per sub-network. Default: 512"
-    )
-    parser.add_argument(
-        "--n_pred",
-        type=int,
-        default=None,
-        help="Number of predictors to consider. Consider using this parameter if your RAM is limited or if you have a high number of features. Default: All genes with nonzero VMR"
-    )
+def deepImpute(**kwargs):
 
-    args = parser.parse_args()
+    args = parse_args()
 
+    for key, value in kwargs.items():
+        setattr(args, key, value)
+
     data = pd.read_csv(args.inputFile, index_col=0)
+
     if args.cell_axis == "columns":
         data = data.T
 
@@ -115,17 +25,16 @@ def deepImpute(
             {"type": "dense", "activation": "relu", "neurons": args.hidden_neurons},
             {"type": "dropout", "activation": "dropout", "rate": args.dropout_rate}]
     }
-
-    imputed = deepImpute(
-        data,
-        NN_lim=args.limit,
-        cell_subset=args.subset,
-        minVMR=args.minVMR,
-        n_pred=args.n_pred,
-        **NN_params
-    )
 
-    if args.cell_axis == "columns":
-        imputed = imputed.T
-
-    imputed.to_csv(args.o)
+    multi = MultiNet(**NN_params)
+    multi.fit(data, NN_lim=args.limit, cell_subset=args.subset, minVMR=args.minVMR, n_pred=args.n_pred)
+
+    imputed = multi.predict(data, imputed_only=False, policy=args.policy)
+
+    if args.output is not None:
+        imputed.to_csv(args.output)
+    else:
+        return imputed
+
+if __name__ == "__main__":
+    deepImpute()
diff --git a/deepimpute/parser.py b/deepimpute/parser.py
@@ -0,0 +1,96 @@
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="scRNA-seq data imputation using DeepImpute."
+    )
+    parser.add_argument("inputFile", type=str, help="Path to input data.")
+    parser.add_argument(
+        "-o", "--output",
+        type=str,
+        default="./imputed.csv",
+        help="Path to output data counts. Default: ./imputed.csv",
+    )
+    parser.add_argument(
+        "--cores", type=int, default=-1, help="Number of cores. Default: all available cores"
+    )
+    parser.add_argument(
+        "--cell-axis",
+        type=str,
+        choices=["rows", "columns"],
+        default="rows",
+        help="Cell dimension in the matrix. Default: rows",
+    )
+    parser.add_argument(
+        "--limit",
+        type=str,
+        default="auto",
+        help="Genes to impute (e.g. first 2000 genes). Default: auto",
+    )
+    parser.add_argument(
+        "--minVMR",
+        type=float,
+        default="0.5",
+        help="Min Variance over mean ratio for gene exclusion. Gene with a VMR below ${minVMR} are discarded. Used if --limit is set to 'auto'. Default: 0.5",
+    )
+    parser.add_argument(
+        "--subset",
+        type=float,
+        default=1,
+        help="Cell subset to speed up training. \
+                        Either a ratio (0<x<1) or a cell number (int). Default: 1 (all)",
+    )
+    parser.add_argument(
+        "--learning-rate",
+        type=float,
+        default=0.0005,
+        help="Learning rate. Default: 0.0001"
+    )
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        default=64,
+        help="Batch size. Default: 64"
+    )
+    parser.add_argument(
+        "--max-epochs",
+        type=int,
+        default=300,
+        help="Maximum number of epochs. Default: 500"
+    )
+    parser.add_argument(
+        "--hidden-neurons",
+        type=int,
+        default=300,
+        help="Number of neurons in the hidden dense layer. Default: 256"
+    )
+    parser.add_argument(
+        "--dropout-rate",
+        type=float,
+        default=0.2,
+        help="Dropout rate for the hidden dropout layer (0<rate<1). Default: 0.2"
+    )
+    parser.add_argument(
+        "--output-neurons",
+        type=int,
+        default=512,
+        help="Number of output neurons per sub-network. Default: 512"
+    )
+    parser.add_argument(
+        "--n_pred",
+        type=int,
+        default=None,
+        help="Number of predictors to consider. Consider using this parameter if your RAM is limited or if you have a high number of features. Default: All genes with nonzero VMR"
+    )
+
+    parser.add_argument(
+        "--policy",
+        type=str,
+        default='restore',
+        help="Whether to restore positive values from the raw dataset or keep the max between the imputed values and the raw values. Choices are ['restore', 'max']. Default: restore"
+    )
+
+    args = parser.parse_args()
+
+    return args
+
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,5 +4,4 @@ MAINTAINER Breck Yunits <byunits@cc.hawaii.edu>

		RUN apt-get update && apt-get install -y git

		RUN git clone https://github.com/lanagarmire/deepimpute && cd deepimpute && pip install --user .

		RUN git clone https://github.com/lanagarmire/deepimpute && cd deepimpute && pip install --user .