diff --git a/inference/Dockerfile b/inference/Dockerfile deleted file mode 100644 index d7a6e5e..0000000 --- a/inference/Dockerfile +++ /dev/null @@ -1,55 +0,0 @@ -FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04 -MAINTAINER Li Shen - -# ============== OpenCV2, numpy, scipy =================== # -RUN echo "deb http://us.archive.ubuntu.com/ubuntu xenial main multiverse" >> /etc/apt/sources.list -ENV TERM xterm-256color # just to turn off some warnings. -RUN apt-get -y update && \ - apt-get install -y --no-install-recommends python-opencv python-numpy python-scipy && \ - apt-get autoclean && \ - apt-get autoremove -RUN python -c "import cv2; print cv2.__version__" - -# =======================Parallel & convert ==================== # -RUN apt-get -y update && \ - apt-get install -y --no-install-recommends parallel imagemagick && \ - apt-get autoclean && \ - apt-get autoremove - -# ===================== Tensorflow ======================# -ENV PATH /usr/local/cuda/bin${PATH:+:${PATH}} -ENV LD_LIBRARY_PATH /usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} -RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nvidia-367-dev && \ - apt-get autoclean && \ - apt-get autoremove -RUN apt-get install -y --no-install-recommends python-pip python-dev && \ - pip install -U pip && \ - apt-get autoclean && \ - apt-get autoremove -RUN pip install -U tensorflow-gpu - -# ====================== Sklearn ========================# -RUN pip install -U scikit-learn -RUN python -c "import sklearn; print sklearn.__version__" - -# ====================== Keras ==========================# -RUN pip install -U pyyaml six h5py pydot-ng -WORKDIR / -RUN echo "Keras last updated: 2016-12-22" -RUN apt-get install -y --no-install-recommends git && \ - git clone https://github.com/lishen/keras.git -WORKDIR keras -RUN git checkout fix-1-channel-samplewise-std -RUN python setup.py install -RUN python -c "import keras; print keras.__version__" - -# ============================================== # -# Copy processing and training files: -RUN pip install -U pandas pydicom -WORKDIR / -# COPY train.sh train_small.sh ./ -COPY sc1_infer.sh ./ -COPY temp/*.py dm_sc1_infer.py ./ -COPY temp/resnet50_288_bestAuc_model.h5 ./ -# COPY ./modelState/2017-01-15_resnet50_288_4/resnet50_288_bestAuc_model_4.h5 ./ -# VOLUME ["/metadata", "/trainingData", "/preprocessedData", "/modelState", "/scratch"] diff --git a/inference/INSTRUCTIONS b/inference/INSTRUCTIONS deleted file mode 100644 index b10011c..0000000 --- a/inference/INSTRUCTIONS +++ /dev/null @@ -1,20 +0,0 @@ -The predictions must be saved to /output/predictions.tsv and have the following TSV format: - -subjectId laterality confidence -1 L 0.01 -1 R 0.05 -2 L 0.00 -2 R 0.01 - -First, the predictions file must include the above header as the scoring script refers to the column by their name. The first column contains the ID of the subject and the second column the laterality of the breast (left or right). The third column contains the confidence level predicted by the inference method that a breast will develop cancer within 12 months. The confidence level must take values in [0,1], where 0 means that the breast will not develop cancer and 1 means that the breast will develop a cancer for sure. - -Below are all the requirements that a prediction file must follow for it to be scored. -* Predictions MUST be a TSV file and MUST be named predictions.tsv -* Prediction file MUST have the headers (case sensitive): subjectId, laterality, confidence -* A sample is considered as subjectId + laterality (Ex. 0001R, 0001L) -* Laterality MUST be either R, or L. Please do not use lowercase letters or right/left. -* No duplicated samples allowed -* All samples in the prediction file MUST exist in the goldstandard -* Confidence values cannot be negative -* Must at least have one confidence value (Can't submit all NA or blank file) -* Empty confidence/missing samples will be imputed with the median of confidence values diff --git a/inference/calcAUC.R b/inference/calcAUC.R deleted file mode 100644 index f634291..0000000 --- a/inference/calcAUC.R +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env Rscript - -# if (!require(pROC)) { -# install.packages("pROC") -# } - -suppressMessages(library(pROC)) -## These functions assume that the gold standard data and the predictions -## have already been matched - -## computes AUC and partial AUC focusing on sensitivity -## -#Assume label and prediction are matched -GetScores <- function(label, prediction, sensitivityRange = c(0.8, 1)) { - roc1 <- roc(label, prediction, direction = "<") - AUC <- auc(roc1)[1] - pAUCse <- auc(roc1, partial.auc = sensitivityRange, - partial.auc.focus = "sensitivity", - partial.auc.correct = FALSE)[1] - SpecAtSens <- coords(roc1, sensitivityRange[1], input = "sensitivity", - ret = "specificity") - list(AUC = AUC, pAUCse = pAUCse, SpecAtSens = SpecAtSens) -} - -## -args <- commandArgs(T) -pred.tbl <- read.table(args[1], sep="\t", header=T) -if(sum(pred.tbl$target == 1) == 0) { - cat("NO cancer cases, AUROC score was not calculated.\n") -} else { - scores = GetScores(pred.tbl$target, pred.tbl$confidence) - cat("==========================================\n") - cat(sprintf("AUC=%.4f, pAUC=%.4f, SpecAtSens=%.4f\n", - scores$AUC, scores$pAUCse, scores$SpecAtSens)) - cat("==========================================\n") -} - diff --git a/inference/dm_sc1_candidROI_infer.py b/inference/dm_sc1_candidROI_infer.py deleted file mode 100644 index 1747e50..0000000 --- a/inference/dm_sc1_candidROI_infer.py +++ /dev/null @@ -1,226 +0,0 @@ -import argparse, os -import numpy as np -from sklearn.model_selection import train_test_split -from keras.models import load_model -from meta import DMMetaManager -from dm_image import DMImageDataGenerator -import dm_inference as dminfer -from dm_keras_ext import DMMetrics -from dm_multi_gpu import make_parallel - -import warnings -import exceptions -warnings.filterwarnings('ignore', category=exceptions.UserWarning) - - -def run(img_folder, img_height=1024, img_scale=4095, - roi_per_img=32, roi_size=(256, 256), - low_int_threshold=.05, blob_min_area=3, - blob_min_int=.5, blob_max_int=.85, blob_th_step=10, - roi_state=None, roi_bs=32, - do_featurewise_norm=True, featurewise_mean=884.7, featurewise_std=745.3, - img_tsv='./metadata/images_crosswalk_prediction.tsv', exam_tsv=None, - dl_state=None, dl_bs=32, nb_top_avg=1, validation_mode=False, - val_size=None, img_voting=False, - out_pred='./output/predictions.tsv'): - '''Run SC1 inference using the candidate ROI approach - Notes: - "mean=884.7, std=745.3" are estimated from 20 subjects on the - training data. - ''' - - # Read some env variables. - random_seed = int(os.getenv('RANDOM_SEED', 12345)) - # nb_worker = int(os.getenv('NUM_CPU_CORES', 4)) - gpu_count = int(os.getenv('NUM_GPU_DEVICES', 1)) - - # Setup data generator for inference. - meta_man = DMMetaManager( - img_tsv=img_tsv, exam_tsv=exam_tsv, img_folder=img_folder, - img_extension='dcm') - if val_size is not None: # Use a subset for validation. - subj_list, subj_labs = meta_man.get_subj_labs() - _, subj_test = train_test_split( - subj_list, test_size=val_size, random_state=random_seed, - stratify=subj_labs) - else: - subj_test = None - - if validation_mode: - exam_list = meta_man.get_flatten_exam_list(subj_list=subj_test, - flatten_img_list=True) - else: - exam_list = meta_man.get_last_exam_list(subj_list=subj_test, - flatten_img_list=True) - - if do_featurewise_norm: - img_gen = DMImageDataGenerator(featurewise_center=True, - featurewise_std_normalization=True) - img_gen.mean = featurewise_mean - img_gen.std = featurewise_std - else: - img_gen = DMImageDataGenerator(samplewise_center=True, - samplewise_std_normalization=True) - if validation_mode: - class_mode = 'categorical' - else: - class_mode = None - - # Load ROI classifier. - if roi_state is not None: - roi_clf = load_model( - roi_state, - custom_objects={ - 'sensitivity': DMMetrics.sensitivity, - 'specificity': DMMetrics.specificity - } - ) - if gpu_count > 1: - roi_clf = make_parallel(roi_clf, gpu_count) - else: - roi_clf = None - - # Load model. - if dl_state is not None: - model = load_model(dl_state) - else: - raise Exception('At least one model state must be specified.') - if gpu_count > 1: - model = make_parallel(model, gpu_count) - - # A function to make predictions on image patches from an image list. - def pred_img_list(img_list): - roi_generator = img_gen.flow_from_candid_roi( - img_list, target_height=img_height, target_scale=img_scale, - class_mode=class_mode, validation_mode=True, - img_per_batch=len(img_list), roi_per_img=roi_per_img, - roi_size=roi_size, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, - roi_clf=roi_clf, clf_bs=roi_bs, return_sample_weight=True, - seed=random_seed) - roi_dat, roi_w = roi_generator.next() - # import pdb; pdb.set_trace() - pred = model.predict(roi_dat, batch_size=dl_bs) - pred = pred[:, 1] # cancer class predictions. - if roi_clf is not None: - # return np.average(pred, weights=roi_w) - # import pdb; pdb.set_trace() - return pred[np.argsort(roi_w)[-nb_top_avg:]].mean() - elif img_voting: - pred = pred.reshape((-1, roi_per_img)) - img_preds = [ np.sort(row)[-nb_top_avg:].mean() for row in pred ] - return np.mean(img_preds) - else: - return np.sort(pred)[-nb_top_avg:].mean() - - - # Print header. - fout = open(out_pred, 'w') - if validation_mode: - fout.write(dminfer.INFER_HEADER_VAL) - else: - fout.write(dminfer.INFER_HEADER) - - for subj, exidx, exam in exam_list: - try: - predL = pred_img_list(exam['L']['img']) - except KeyError: - predL = .0 - try: - predR = pred_img_list(exam['R']['img']) - except KeyError: - predR = .0 - - try: - cancerL = int(exam['L']['cancer']) - except ValueError: - cancerL = 0 - try: - cancerR = int(exam['R']['cancer']) - except ValueError: - cancerR = 0 - - if validation_mode: - fout.write("%s\t%s\tL\t%f\t%d\n" % \ - (str(subj), str(exidx), predL, cancerL)) - fout.write("%s\t%s\tR\t%f\t%d\n" % \ - (str(subj), str(exidx), predR, cancerR)) - else: - fout.write("%s\tL\t%f\n" % (str(subj), predL)) - fout.write("%s\tR\t%f\n" % (str(subj), predR)) - - fout.close() - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="DM SC1 inference") - parser.add_argument("img_folder", type=str) - parser.add_argument("--img-height", "-ih", dest="img_height", type=int, default=1024) - parser.add_argument("--img-scale", "-is", dest="img_scale", type=int, default=4095) - parser.add_argument("--roi-per-img", "-rpi", dest="roi_per_img", type=int, default=32) - parser.add_argument("--roi-size", "-rs", dest="roi_size", nargs=2, type=int, default=[256, 256]) - parser.add_argument("--low-int-threshold", dest="low_int_threshold", type=float, default=.05) - parser.add_argument("--blob-min-area", dest="blob_min_area", type=int, default=3) - parser.add_argument("--blob-min-int", dest="blob_min_int", type=float, default=.5) - parser.add_argument("--blob-max-int", dest="blob_max_int", type=float, default=.85) - parser.add_argument("--blob-th-step", dest="blob_th_step", type=int, default=10) - parser.add_argument("--roi-state", dest="roi_state", type=str, default=None) - parser.add_argument("--no-roi-state", dest="roi_state", action="store_const", const=None) - parser.add_argument("--roi-bs", dest="roi_bs", type=int, default=32) - parser.add_argument("--featurewise-norm", dest="do_featurewise_norm", action="store_true") - parser.add_argument("--no-featurewise-norm", dest="do_featurewise_norm", action="store_false") - parser.set_defaults(do_featurewise_norm=True) - parser.add_argument("--featurewise-mean", "-feam", dest="featurewise_mean", type=float, default=884.7) - parser.add_argument("--featurewise-std", "-feas", dest="featurewise_std", type=float, default=745.3) - parser.add_argument("--img-tsv", "-it", dest="img_tsv", type=str, - default="./metadata/images_crosswalk.tsv") - parser.add_argument("--exam-tsv", "-et", dest="exam_tsv", type=str) - parser.add_argument("--no-exam-tsv", dest="exam_tsv", action="store_const", const=None) - parser.set_defaults(exam_tsv=None) - parser.add_argument("--dl-state", "-ds", dest="dl_state", type=str) - parser.add_argument("--dl-bs", "-bs", dest="dl_bs", type=int, default=32) - parser.add_argument("--nb-top-avg", dest="nb_top_avg", type=int, default=1) - parser.add_argument("--validation-mode", dest="validation_mode", action="store_true") - parser.add_argument("--no-validation-mode", dest="validation_mode", action="store_false") - parser.set_defaults(validation_mode=False) - parser.add_argument("--val-size", dest="val_size", type=float, default=None) - parser.add_argument("--no-val-size", dest="val_size", action="store_const", const=None) - parser.add_argument("--img-voting", dest="img_voting", action="store_true") - parser.add_argument("--no-img-voting", dest="img_voting", action="store_false") - parser.set_defaults(img_voting=False) - parser.add_argument("--out-pred", "-o", dest="out_pred", type=str, - default="./output/predictions.tsv") - - args = parser.parse_args() - run_opts = dict( - img_height=args.img_height, - img_scale=args.img_scale, - roi_per_img=args.roi_per_img, - roi_size=tuple(args.roi_size), - low_int_threshold=args.low_int_threshold, - blob_min_area=args.blob_min_area, - blob_min_int=args.blob_min_int, - blob_max_int=args.blob_max_int, - blob_th_step=args.blob_th_step, - roi_state=args.roi_state, - roi_bs=args.roi_bs, - do_featurewise_norm=args.do_featurewise_norm, - featurewise_mean=args.featurewise_mean, - featurewise_std=args.featurewise_std, - img_tsv=args.img_tsv, - exam_tsv=args.exam_tsv, - dl_state=args.dl_state, - dl_bs=args.dl_bs, - nb_top_avg=args.nb_top_avg, - validation_mode=args.validation_mode, - val_size=(args.val_size if args.val_size is None or args.val_size < 1. - else int(args.val_size)), - img_voting=args.img_voting, - out_pred=args.out_pred - ) - print "\n>>> Inference options: <<<\n", run_opts, "\n" - run(args.img_folder, **run_opts) - diff --git a/inference/dm_sc1_infer.py b/inference/dm_sc1_infer.py deleted file mode 100644 index eb3ae88..0000000 --- a/inference/dm_sc1_infer.py +++ /dev/null @@ -1,148 +0,0 @@ -import argparse -import numpy as np -from keras.models import load_model -from meta import DMMetaManager -from dm_image import DMImageDataGenerator -from dm_enet import MultiViewDLElasticNet -import dm_inference as dminfer - -import warnings -import exceptions -warnings.filterwarnings('ignore', category=exceptions.UserWarning) - - -def run(img_folder, img_size=[288, 224], do_featurewise_norm=True, - featurewise_mean=485.9, featurewise_std=765.2, batch_size=16, - img_tsv='./metadata/images_crosswalk_prediction.tsv', - exam_tsv=None, - dl_state=None, - enet_state=None, - validation_mode=False, use_mean=False, - out_pred='./output/predictions.tsv'): - '''Run SC1 inference - Args: - featurewise_mean, featurewise_std ([float]): they are estimated from - 1152 x 896 images. Using different sized images give very close - results. For png, mean=7772, std=12187. - ''' - - # Setup data generator for inference. - meta_man = DMMetaManager( - img_tsv=img_tsv, exam_tsv=exam_tsv, img_folder=img_folder, - img_extension='dcm') - if validation_mode: - exam_list = meta_man.get_flatten_exam_list() - else: - exam_list = meta_man.get_last_exam_list() - if do_featurewise_norm: - img_gen = DMImageDataGenerator(featurewise_center=True, - featurewise_std_normalization=True) - img_gen.mean = featurewise_mean - img_gen.std = featurewise_std - else: - img_gen = DMImageDataGenerator(samplewise_center=True, - samplewise_std_normalization=True) - if validation_mode: - class_mode = 'binary' - else: - class_mode = None - datgen_exam = img_gen.flow_from_exam_list( - exam_list, target_size=(img_size[0], img_size[1]), - class_mode=class_mode, prediction_mode=True, batch_size=batch_size) - - if enet_state is not None: - model = MultiViewDLElasticNet(*enet_state) - elif dl_state is not None: - model = load_model(dl_state) - else: - raise Exception('At least one model state must be specified.') - exams_seen = 0 - fout = open(out_pred, 'w') - - # Print header. - if validation_mode: - fout.write(dminfer.INFER_HEADER_VAL) - else: - fout.write(dminfer.INFER_HEADER) - - while exams_seen < len(exam_list): - ebat = next(datgen_exam) - if class_mode is not None: - bat_x = ebat[0] - bat_y = ebat[1] - else: - bat_x = ebat - subj_batch = bat_x[0] - exam_batch = bat_x[1] - cc_batch = bat_x[2] - mlo_batch = bat_x[3] - for i, subj in enumerate(subj_batch): - exam = exam_batch[i] - li = i*2 # left breast index. - ri = i*2 + 1 # right breast index. - left_pred = dminfer.pred_2view_img_list( - cc_batch[li], mlo_batch[li], model, use_mean) - right_pred = dminfer.pred_2view_img_list( - cc_batch[ri], mlo_batch[ri], model, use_mean) - if validation_mode: - fout.write("%s\t%s\tL\t%f\t%f\n" % \ - (str(subj), str(exam), left_pred, bat_y[li])) - fout.write("%s\t%s\tR\t%f\t%f\n" % \ - (str(subj), str(exam), right_pred, bat_y[ri])) - else: - fout.write("%s\tL\t%f\n" % (str(subj), left_pred)) - fout.write("%s\tR\t%f\n" % (str(subj), right_pred)) - - exams_seen += len(subj_batch) - - fout.close() - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="DM SC1 inference") - parser.add_argument("img_folder", type=str) - parser.add_argument("--img-size", "-is", dest="img_size", nargs=2, type=int, - default=[288, 224]) - parser.add_argument("--featurewise-norm", dest="do_featurewise_norm", action="store_true") - parser.add_argument("--no-featurewise-norm", dest="do_featurewise_norm", action="store_false") - parser.set_defaults(do_featurewise_norm=True) - parser.add_argument("--featurewise-mean", "-feam", dest="featurewise_mean", - type=float, default=485.9) - parser.add_argument("--featurewise-std", "-feas", dest="featurewise_std", - type=float, default=765.2) - parser.add_argument("--batch-size", "-bs", dest="batch_size", type=int, default=16) - parser.add_argument("--img-tsv", "-it", dest="img_tsv", type=str, - default="./metadata/images_crosswalk.tsv") - parser.add_argument("--exam-tsv", "-et", dest="exam_tsv", type=str) - parser.add_argument("--no-exam-tsv", dest="exam_tsv", action="store_const", const=None) - parser.set_defaults(exam_tsv=None) - parser.add_argument("--dl-state", "-ds", dest="dl_state", type=str) - parser.add_argument("--enet-state", "-es", dest="enet_state", nargs=2, type=str) - parser.add_argument("--validation-mode", dest="validation_mode", action="store_true") - parser.add_argument("--no-validation-mode", dest="validation_mode", action="store_false") - parser.set_defaults(validation_mode=False) - parser.add_argument("--use-mean", dest="use_mean", action="store_true") - parser.add_argument("--no-use-mean", dest="use_mean", action="store_false") - parser.set_defaults(use_mean=False) - parser.add_argument("--out-pred", "-o", dest="out_pred", type=str, - default="./output/predictions.tsv") - - args = parser.parse_args() - run_opts = dict( - img_size=args.img_size, - do_featurewise_norm=args.do_featurewise_norm, - featurewise_mean=args.featurewise_mean, - featurewise_std=args.featurewise_std, - batch_size=args.batch_size, - img_tsv=args.img_tsv, - exam_tsv=args.exam_tsv, - dl_state=args.dl_state, - enet_state=args.enet_state, - validation_mode=args.validation_mode, - use_mean=args.use_mean, - out_pred=args.out_pred - ) - print "\n>>> Inference options: <<<\n", run_opts, "\n" - run(args.img_folder, **run_opts) - diff --git a/inference/dm_sc1_phm_infer.py b/inference/dm_sc1_phm_infer.py deleted file mode 100644 index 2cdd275..0000000 --- a/inference/dm_sc1_phm_infer.py +++ /dev/null @@ -1,231 +0,0 @@ -import argparse, os, sys, pickle -import numpy as np -import pandas as pd -from keras.models import load_model -from meta import DMMetaManager -from dm_image import get_prob_heatmap -import dm_inference as dminfer -from dm_region import prob_heatmap_features -from dm_multi_gpu import make_parallel -import warnings -import exceptions -warnings.filterwarnings('ignore', category=exceptions.UserWarning) - - -def run(img_folder, dl_state, clf_info_state, img_extension='dcm', - img_height=4096, img_scale=255., - equalize_hist=False, featurewise_center=False, featurewise_mean=91.6, - net='resnet50', batch_size=64, patch_size=256, stride=64, - exam_tsv='./metadata/exams_metadata.tsv', - img_tsv='./metadata/images_crosswalk.tsv', - validation_mode=False, use_mean=False, - out_pred='./output/predictions.tsv', - progress='./progress.txt'): - '''Run SC1 inference using prob heatmaps - ''' - # Read some env variables. - random_seed = int(os.getenv('RANDOM_SEED', 12345)) - rng = np.random.RandomState(random_seed) # an rng used across board. - gpu_count = int(os.getenv('NUM_GPU_DEVICES', 1)) - - # Setup data generator for inference. - meta_man = DMMetaManager( - img_tsv=img_tsv, exam_tsv=exam_tsv, img_folder=img_folder, - img_extension=img_extension) - if validation_mode: - exam_list = meta_man.get_flatten_exam_list(cc_mlo_only=True) - exam_labs = meta_man.exam_labs(exam_list) - exam_labs = np.array(exam_labs) - print "positive exams=%d, negative exams=%d" \ - % ((exam_labs==1).sum(), (exam_labs==0).sum()) - sys.stdout.flush() - else: - exam_list = meta_man.get_last_exam_list(cc_mlo_only=True) - exam_labs = None - - # Load DL model and classifiers. - print "Load patch classifier:", dl_state; sys.stdout.flush() - dl_model = load_model(dl_state) - if gpu_count > 1: - print "Make the model parallel on %d GPUs" % (gpu_count) - sys.stdout.flush() - dl_model, _ = make_parallel(dl_model, gpu_count) - parallelized = True - else: - parallelized = False - feature_name, nb_phm, cutoff_list, k, clf_list = \ - pickle.load(open(clf_info_state)) - - # Load preprocess function. - if featurewise_center: - preprocess_input = None - else: - print "Load preprocess function for net:", net - if net == 'resnet50': - from keras.applications.resnet50 import preprocess_input - elif net == 'vgg16': - from keras.applications.vgg16 import preprocess_input - elif net == 'vgg19': - from keras.applications.vgg19 import preprocess_input - elif net == 'xception': - from keras.applications.xception import preprocess_input - elif net == 'inception': - from keras.applications.inception_v3 import preprocess_input - else: - raise Exception("Pretrained model is not available: " + net) - - # Print header. - fout = open(out_pred, 'w') - if validation_mode: - fout.write(dminfer.INFER_HEADER_VAL) - else: - fout.write(dminfer.INFER_HEADER) - - print "Start inference for exam list" - sys.stdout.flush() - for i,e in enumerate(exam_list): - ### DEBUG ### - # if i >= 3: - # break - ### DEBUG ### - subj = e[0] - exam_idx = e[1] - if validation_mode: - left_cancer = e[2]['L']['cancer'] - right_cancer = e[2]['R']['cancer'] - left_cancer = 0 if np.isnan(left_cancer) else left_cancer - right_cancer = 0 if np.isnan(right_cancer) else right_cancer - try: - left_cc_phms = get_prob_heatmap( - e[2]['L']['CC'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - except: - left_cc_phms = [None] - try: - left_mlo_phms = get_prob_heatmap( - e[2]['L']['MLO'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - except: - left_mlo_phms = [None] - try: - right_cc_phms = get_prob_heatmap( - e[2]['R']['CC'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - except: - right_cc_phms = [None] - try: - right_mlo_phms = get_prob_heatmap( - e[2]['R']['MLO'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - except: - right_mlo_phms = [None] - try: - left_pred = dminfer.make_pred_case( - left_cc_phms, left_mlo_phms, feature_name, cutoff_list, clf_list, - k=k, nb_phm=nb_phm, use_mean=use_mean) - except: - print "Exception in predicting left breast" + \ - " for subj:", subj, "exam:", exam_idx - sys.stdout.flush() - left_pred = 0. - try: - right_pred = dminfer.make_pred_case( - right_cc_phms, right_mlo_phms, feature_name, cutoff_list, clf_list, - k=k, nb_phm=nb_phm, use_mean=use_mean) - except: - print "Exception in predicting right breast" + \ - " for subj:", subj, "exam:", exam_idx - sys.stdout.flush() - right_pred = 0. - if validation_mode: - fout.write("%s\t%s\tL\t%f\t%f\n" % \ - (str(subj), str(exam_idx), left_pred, left_cancer)) - fout.write("%s\t%s\tR\t%f\t%f\n" % \ - (str(subj), str(exam_idx), right_pred, right_cancer)) - fout.flush() - else: - fout.write("%s\tL\t%f\n" % (str(subj), left_pred)) - fout.write("%s\tR\t%f\n" % (str(subj), right_pred)) - fout.flush() - print "processed %d/%d exams" % (i+1, len(exam_list)) - sys.stdout.flush() - with open(progress, 'w') as fpro: - fpro.write("%f\n" % ( (i + 1.)/len(exam_list)) ) - print "Done." - fout.close() - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="DM SC1 inference") - parser.add_argument("img_folder", type=str) - parser.add_argument("dl_state", type=str) - parser.add_argument("clf_info_state", type=str) - parser.add_argument("--img-extension", "-ext", dest="img_extension", type=str, default="dcm") - parser.add_argument("--img-height", dest="img_height", type=int, default=4096) - parser.add_argument("--img-scale", dest="img_scale", type=float, default=255.) - parser.add_argument("--equalize-hist", dest="equalize_hist", action="store_true") - parser.add_argument("--no-equalize-hist", dest="equalize_hist", action="store_false") - parser.set_defaults(equalize_hist=False) - parser.add_argument("--featurewise-center", dest="featurewise_center", action="store_true") - parser.add_argument("--no-featurewise-center", dest="featurewise_center", action="store_false") - parser.set_defaults(featurewise_center=True) - parser.add_argument("--featurewise-mean", dest="featurewise_mean", type=float, default=91.6) - parser.add_argument("--net", dest="net", type=str, default="resnet50") - parser.add_argument("--batch-size", dest="batch_size", type=int, default=64) - parser.add_argument("--patch-size", dest="patch_size", type=int, default=256) - parser.add_argument("--stride", dest="stride", type=int, default=64) - parser.add_argument("--img-tsv", dest="img_tsv", type=str, default="./metadata/images_crosswalk.tsv") - parser.add_argument("--exam-tsv", dest="exam_tsv", type=str) - parser.add_argument("--no-exam-tsv", dest="exam_tsv", action="store_const", const=None) - parser.set_defaults(exam_tsv=None) - parser.add_argument("--validation-mode", dest="validation_mode", action="store_true") - parser.add_argument("--no-validation-mode", dest="validation_mode", action="store_false") - parser.set_defaults(validation_mode=False) - parser.add_argument("--use-mean", dest="use_mean", action="store_true") - parser.add_argument("--no-use-mean", dest="use_mean", action="store_false") - parser.set_defaults(use_mean=False) - parser.add_argument("--out-pred", dest="out_pred", type=str, default="./output/predictions.tsv") - parser.add_argument("--progress", dest="progress", type=str, default="./progress.txt") - - args = parser.parse_args() - run_opts = dict( - img_extension=args.img_extension, - img_height=args.img_height, - img_scale=args.img_scale, - equalize_hist=args.equalize_hist, - featurewise_center=args.featurewise_center, - featurewise_mean=args.featurewise_mean, - net=args.net, - batch_size=args.batch_size, - patch_size=args.patch_size, - stride=args.stride, - img_tsv=args.img_tsv, - exam_tsv=args.exam_tsv, - validation_mode=args.validation_mode, - use_mean=args.use_mean, - out_pred=args.out_pred, - progress=args.progress - ) - print "\n>>> Inference options: <<<\n", run_opts, "\n" - run(args.img_folder, args.dl_state, args.clf_info_state, **run_opts) - - - - - - - - - - - - diff --git a/inference/dm_sc2_infer.py b/inference/dm_sc2_infer.py deleted file mode 100644 index ca5f746..0000000 --- a/inference/dm_sc2_infer.py +++ /dev/null @@ -1,188 +0,0 @@ -import argparse -import pickle -import numpy as np -import pandas as pd -from keras.models import load_model -import xgboost as xgb -from meta import DMMetaManager -from dm_image import DMImageDataGenerator -from dm_enet import MultiViewDLElasticNet -import dm_inference as dminfer - -import warnings -import exceptions -warnings.filterwarnings('ignore', category=exceptions.UserWarning) - - -def run(img_folder, img_size=[288, 224], do_featurewise_norm=True, - featurewise_mean=485.9, featurewise_std=765.2, - img_tsv='./metadata/images_crosswalk.tsv', - exam_tsv='./metadata/exams_metadata.tsv', - dl_state=None, - enet_state=None, - xgb_state=None, - validation_mode=False, use_mean=False, - out_pred='./output/predictions.tsv'): - '''Run SC2 inference - Args: - featurewise_mean, featurewise_std ([float]): they are estimated from - 1152 x 896 images. Using different sized images give very close - results. For png, mean=7772, std=12187. - ''' - - # Setup data generator for inference. - meta_man = DMMetaManager( - img_tsv=img_tsv, exam_tsv=exam_tsv, img_folder=img_folder, - img_extension='dcm') - last2_exgen = meta_man.last_2_exam_generator() - if do_featurewise_norm: - img_gen = DMImageDataGenerator(featurewise_center=True, - featurewise_std_normalization=True) - img_gen.mean = featurewise_mean - img_gen.std = featurewise_std - else: - img_gen = DMImageDataGenerator(samplewise_center=True, - samplewise_std_normalization=True) - if validation_mode: - class_mode = 'binary' - else: - class_mode = None - - # Image prediction model. - if enet_state is not None: - model = MultiViewDLElasticNet(*enet_state) - elif dl_state is not None: - model = load_model(dl_state) - else: - raise Exception('At least one image model state must be specified.') - - # XGB model. - xgb_clf = pickle.load(open(xgb_state)) - - # Print header. - fout = open(out_pred, 'w') - if validation_mode: - fout.write(dminfer.INFER_HEADER_VAL) - else: - fout.write(dminfer.INFER_HEADER) - - # Loop through all last 2 exam pairs. - for subj_id, curr_idx, curr_dat, prior_idx, prior_dat in last2_exgen: - # Get meta info for both breasts. - left_record, right_record = meta_man.get_info_exam_pair( - curr_dat, prior_dat) - nb_days = left_record['daysSincePreviousExam'] - - # Get image data and make predictions. - exam_list = [] - exam_list.append( (subj_id, curr_idx, - meta_man.get_info_per_exam(curr_dat)) ) - if prior_idx is not None: - exam_list.append( (subj_id, prior_idx, - meta_man.get_info_per_exam(prior_dat)) ) - datgen_exam = img_gen.flow_from_exam_list( - exam_list, target_size=(img_size[0], img_size[1]), - class_mode=class_mode, prediction_mode=True, - batch_size=len(exam_list), verbose=False) - ebat = next(datgen_exam) - if class_mode is not None: - bat_x = ebat[0] - bat_y = ebat[1] - else: - bat_x = ebat - cc_batch = bat_x[2] - mlo_batch = bat_x[3] - curr_left_score = dminfer.pred_2view_img_list( - cc_batch[0], mlo_batch[0], model, use_mean) - curr_right_score = dminfer.pred_2view_img_list( - cc_batch[1], mlo_batch[1], model, use_mean) - if prior_idx is not None: - prior_left_score = dminfer.pred_2view_img_list( - cc_batch[2], mlo_batch[2], model, use_mean) - prior_right_score = dminfer.pred_2view_img_list( - cc_batch[3], mlo_batch[3], model, use_mean) - diff_left_score = (curr_left_score - prior_left_score)/nb_days*365 - diff_right_score = (curr_right_score - prior_right_score)/nb_days*365 - else: - prior_left_score = np.nan - prior_right_score = np.nan - diff_left_score = np.nan - diff_right_score = np.nan - - # Merge image scores into meta info. - left_record = left_record\ - .assign(curr_score=curr_left_score)\ - .assign(prior_score=prior_left_score)\ - .assign(diff_score=diff_left_score) - right_record = right_record\ - .assign(curr_score=curr_right_score)\ - .assign(prior_score=prior_right_score)\ - .assign(diff_score=diff_right_score) - dsubj = xgb.DMatrix(pd.concat([left_record, right_record], - ignore_index=True)) - - # Predict using XGB. - pred = xgb_clf.predict(dsubj, ntree_limit=xgb_clf.best_ntree_limit) - - # Output. - if validation_mode: - fout.write("%s\t%s\tL\t%f\t%f\n" % \ - (str(subj_id), str(curr_idx), pred[0], bat_y[0])) - fout.write("%s\t%s\tR\t%f\t%f\n" % \ - (str(subj_id), str(curr_idx), pred[1], bat_y[1])) - else: - fout.write("%s\tL\t%f\n" % (str(subj_id), pred[0])) - fout.write("%s\tR\t%f\n" % (str(subj_id), pred[1])) - - - fout.close() - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="DM SC2 inference") - parser.add_argument("img_folder", type=str) - parser.add_argument("--img-size", "-is", dest="img_size", nargs=2, type=int, - default=[288, 224]) - parser.add_argument("--featurewise-norm", dest="do_featurewise_norm", action="store_true") - parser.add_argument("--no-featurewise-norm", dest="do_featurewise_norm", action="store_false") - parser.set_defaults(do_featurewise_norm=True) - parser.add_argument("--featurewise-mean", "-feam", dest="featurewise_mean", - type=float, default=485.9) - parser.add_argument("--featurewise-std", "-feas", dest="featurewise_std", - type=float, default=765.2) - parser.add_argument("--img-tsv", "-it", dest="img_tsv", type=str, - default="./metadata/images_crosswalk.tsv") - parser.add_argument("--exam-tsv", "-et", dest="exam_tsv", type=str) - parser.add_argument("--no-exam-tsv", dest="exam_tsv", action="store_const", const=None) - parser.set_defaults(exam_tsv=None) - parser.add_argument("--dl-state", "-ds", dest="dl_state", type=str) - parser.add_argument("--enet-state", "-es", dest="enet_state", nargs=2, type=str) - parser.add_argument("--xgb-state", "-xs", dest="xgb_state", type=str) - parser.add_argument("--validation-mode", dest="validation_mode", action="store_true") - parser.add_argument("--no-validation-mode", dest="validation_mode", action="store_false") - parser.set_defaults(validation_mode=False) - parser.add_argument("--use-mean", dest="use_mean", action="store_true") - parser.add_argument("--no-use-mean", dest="use_mean", action="store_false") - parser.set_defaults(use_mean=False) - parser.add_argument("--out-pred", "-o", dest="out_pred", type=str, - default="./output/predictions.tsv") - - args = parser.parse_args() - run_opts = dict( - img_size=args.img_size, - do_featurewise_norm=args.do_featurewise_norm, - featurewise_mean=args.featurewise_mean, - featurewise_std=args.featurewise_std, - img_tsv=args.img_tsv, - exam_tsv=args.exam_tsv, - dl_state=args.dl_state, - enet_state=args.enet_state, - xgb_state=args.xgb_state, - validation_mode=args.validation_mode, - use_mean=args.use_mean, - out_pred=args.out_pred - ) - print "\n>>> Inference options: <<<\n", run_opts, "\n" - run(args.img_folder, **run_opts) - diff --git a/inference/dm_sc2_phm_infer.py b/inference/dm_sc2_phm_infer.py deleted file mode 100644 index 96132c8..0000000 --- a/inference/dm_sc2_phm_infer.py +++ /dev/null @@ -1,301 +0,0 @@ -import argparse, sys, os -import pickle -import numpy as np -import pandas as pd -from keras.models import load_model -# import xgboost as xgb -from meta import DMMetaManager -# from dm_image import DMImageDataGenerator -import dm_inference as dminfer -from dm_image import get_prob_heatmap -from dm_multi_gpu import make_parallel -import warnings -import exceptions -warnings.filterwarnings('ignore', category=exceptions.UserWarning) - - -def run(img_folder, dl_state, clf_info_state, meta_clf_state, - img_extension='dcm', img_height=4096, img_scale=255., - equalize_hist=False, featurewise_center=False, featurewise_mean=91.6, - net='resnet50', batch_size=64, patch_size=256, stride=64, - exam_tsv='./metadata/exams_metadata.tsv', - img_tsv='./metadata/images_crosswalk.tsv', - validation_mode=False, use_mean=False, - out_pred='./output/predictions.tsv', - progress='./progress.txt'): - '''Run SC2 inference based on prob heatmap - ''' - # Read some env variables. - random_seed = int(os.getenv('RANDOM_SEED', 12345)) - rng = np.random.RandomState(random_seed) # an rng used across board. - gpu_count = int(os.getenv('NUM_GPU_DEVICES', 1)) - - # Setup data generator for inference. - meta_man = DMMetaManager( - img_tsv=img_tsv, exam_tsv=exam_tsv, img_folder=img_folder, - img_extension='dcm') - last2_exgen = meta_man.last_2_exam_generator() - last2_exam_list = list(last2_exgen) - - # Load DL model and classifiers. - print "Load patch classifier:", dl_state; sys.stdout.flush() - dl_model = load_model(dl_state) - if gpu_count > 1: - print "Make the model parallel on %d GPUs" % (gpu_count) - sys.stdout.flush() - dl_model, _ = make_parallel(dl_model, gpu_count) - parallelized = True - else: - parallelized = False - feature_name, nb_phm, cutoff_list, k, clf_list = \ - pickle.load(open(clf_info_state)) - meta_model = pickle.load(open(meta_clf_state)) - - # Load preprocess function. - if featurewise_center: - preprocess_input = None - else: - print "Load preprocess function for net:", net - if net == 'resnet50': - from keras.applications.resnet50 import preprocess_input - elif net == 'vgg16': - from keras.applications.vgg16 import preprocess_input - elif net == 'vgg19': - from keras.applications.vgg19 import preprocess_input - elif net == 'xception': - from keras.applications.xception import preprocess_input - elif net == 'inception': - from keras.applications.inception_v3 import preprocess_input - else: - raise Exception("Pretrained model is not available: " + net) - - # Print header. - fout = open(out_pred, 'w') - if validation_mode: - fout.write(dminfer.INFER_HEADER_VAL) - else: - fout.write(dminfer.INFER_HEADER) - - # Loop through all last 2 exam pairs. - for i, (subj_id, curr_idx, curr_dat, prior_idx, prior_dat) in \ - enumerate(last2_exam_list): - # DEBUG - #if i < 23: - # continue - # DEBUG - # Get meta info for both breasts. - left_record, right_record = meta_man.get_info_exam_pair( - curr_dat, prior_dat) - nb_days = left_record['daysSincePreviousExam'] - - # Get image data and make predictions. - current_exam = meta_man.get_info_per_exam(curr_dat, cc_mlo_only=True) - if prior_idx is not None: - prior_exam = meta_man.get_info_per_exam(prior_dat, cc_mlo_only=True) - - if validation_mode: - left_cancer = current_exam['L']['cancer'] - right_cancer = current_exam['R']['cancer'] - left_cancer = 0 if np.isnan(left_cancer) else left_cancer - right_cancer = 0 if np.isnan(right_cancer) else right_cancer - - # Get prob heatmaps. - try: - left_cc_phms = get_prob_heatmap( - current_exam['L']['CC'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - except: - left_cc_phms = [None] - try: - left_mlo_phms = get_prob_heatmap( - current_exam['L']['MLO'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - except: - left_mlo_phms = [None] - try: - right_cc_phms = get_prob_heatmap( - current_exam['R']['CC'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - except: - right_cc_phms = [None] - try: - right_mlo_phms = get_prob_heatmap( - current_exam['R']['MLO'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - except: - right_mlo_phms = [None] - #import pdb; pdb.set_trace() - try: - curr_left_pred = dminfer.make_pred_case( - left_cc_phms, left_mlo_phms, feature_name, cutoff_list, clf_list, - k=k, nb_phm=nb_phm, use_mean=use_mean) - except: - curr_left_pred = np.nan - try: - curr_right_pred = dminfer.make_pred_case( - right_cc_phms, right_mlo_phms, feature_name, cutoff_list, clf_list, - k=k, nb_phm=nb_phm, use_mean=use_mean) - except: - curr_right_pred = np.nan - - if prior_idx is not None: - try: - left_cc_phms = get_prob_heatmap( - prior_exam['L']['CC'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - except: - left_cc_phms = [None] - try: - left_mlo_phms = get_prob_heatmap( - prior_exam['L']['MLO'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - except: - left_mlo_phms = [None] - try: - right_cc_phms = get_prob_heatmap( - prior_exam['R']['CC'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - except: - right_cc_phms = [None] - try: - right_mlo_phms = get_prob_heatmap( - prior_exam['R']['MLO'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - except: - right_mlo_phms = [None] - try: - prior_left_pred = dminfer.make_pred_case( - left_cc_phms, left_mlo_phms, feature_name, cutoff_list, clf_list, - k=k, nb_phm=nb_phm, use_mean=use_mean) - except: - prior_left_pred = np.nan - try: - prior_right_pred = dminfer.make_pred_case( - right_cc_phms, right_mlo_phms, feature_name, cutoff_list, clf_list, - k=k, nb_phm=nb_phm, use_mean=use_mean) - except: - prior_right_pred = np.nan - try: - diff_left_pred = (curr_left_pred - prior_left_pred)/nb_days*365 - except: - diff_left_pred = np.nan - try: - diff_right_pred = (curr_right_pred - prior_right_pred)/nb_days*365 - except: - diff_right_pred = np.nan - else: - prior_left_pred = np.nan - prior_right_pred = np.nan - diff_left_pred = np.nan - diff_right_pred = np.nan - - try: - # Merge image scores into meta info. - left_record = left_record\ - .assign(curr_score=curr_left_pred)\ - .assign(prior_score=prior_left_pred)\ - .assign(diff_score=diff_left_pred) - right_record = right_record\ - .assign(curr_score=curr_right_pred)\ - .assign(prior_score=prior_right_pred)\ - .assign(diff_score=diff_right_pred) - dsubj = pd.concat([left_record, right_record], ignore_index=True) - # Predict using meta classifier. - pred = meta_model.predict_proba(dsubj)[:,1] - except: - pred = [0., 0.] - - # Output. - if validation_mode: - fout.write("%s\t%s\tL\t%f\t%f\n" % \ - (str(subj_id), str(curr_idx), pred[0], left_cancer)) - fout.write("%s\t%s\tR\t%f\t%f\n" % \ - (str(subj_id), str(curr_idx), pred[1], right_cancer)) - fout.flush() - else: - fout.write("%s\tL\t%f\n" % (str(subj_id), pred[0])) - fout.write("%s\tR\t%f\n" % (str(subj_id), pred[1])) - fout.flush() - - print "processed %d/%d exams" % (i+1, len(last2_exam_list)) - sys.stdout.flush() - with open(progress, 'w') as fpro: - fpro.write("%f\n" % ( (i + 1.)/len(last2_exam_list)) ) - - print "Done." - fout.close() - - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="DM SC2 inference") - parser.add_argument("img_folder", type=str) - parser.add_argument("dl_state", type=str) - parser.add_argument("clf_info_state", type=str) - parser.add_argument("meta_clf_state", type=str) - parser.add_argument("--img-extension", "-ext", dest="img_extension", type=str, default="dcm") - parser.add_argument("--img-height", dest="img_height", type=int, default=4096) - parser.add_argument("--img-scale", dest="img_scale", type=float, default=255.) - parser.add_argument("--equalize-hist", dest="equalize_hist", action="store_true") - parser.add_argument("--no-equalize-hist", dest="equalize_hist", action="store_false") - parser.set_defaults(equalize_hist=False) - parser.add_argument("--featurewise-center", dest="featurewise_center", action="store_true") - parser.add_argument("--no-featurewise-center", dest="featurewise_center", action="store_false") - parser.set_defaults(featurewise_center=True) - parser.add_argument("--featurewise-mean", dest="featurewise_mean", type=float, default=91.6) - parser.add_argument("--net", dest="net", type=str, default="resnet50") - parser.add_argument("--batch-size", dest="batch_size", type=int, default=64) - parser.add_argument("--patch-size", dest="patch_size", type=int, default=256) - parser.add_argument("--stride", dest="stride", type=int, default=64) - parser.add_argument("--img-tsv", dest="img_tsv", type=str, default="./metadata/images_crosswalk.tsv") - parser.add_argument("--exam-tsv", dest="exam_tsv", type=str) - parser.add_argument("--no-exam-tsv", dest="exam_tsv", action="store_const", const=None) - parser.set_defaults(exam_tsv=None) - parser.add_argument("--validation-mode", dest="validation_mode", action="store_true") - parser.add_argument("--no-validation-mode", dest="validation_mode", action="store_false") - parser.set_defaults(validation_mode=False) - parser.add_argument("--use-mean", dest="use_mean", action="store_true") - parser.add_argument("--no-use-mean", dest="use_mean", action="store_false") - parser.set_defaults(use_mean=False) - parser.add_argument("--out-pred", dest="out_pred", type=str, default="./output/predictions.tsv") - parser.add_argument("--progress", dest="progress", type=str, default="./progress.txt") - - args = parser.parse_args() - run_opts = dict( - img_extension=args.img_extension, - img_height=args.img_height, - img_scale=args.img_scale, - equalize_hist=args.equalize_hist, - featurewise_center=args.featurewise_center, - featurewise_mean=args.featurewise_mean, - net=args.net, - batch_size=args.batch_size, - patch_size=args.patch_size, - stride=args.stride, - img_tsv=args.img_tsv, - exam_tsv=args.exam_tsv, - validation_mode=args.validation_mode, - use_mean=args.use_mean, - out_pred=args.out_pred, - progress=args.progress - ) - print "\n>>> Inference options: <<<\n", run_opts, "\n" - run(args.img_folder, args.dl_state, args.clf_info_state, - args.meta_clf_state, **run_opts) diff --git a/inference/getAUC.R b/inference/getAUC.R deleted file mode 100644 index 79cf4fb..0000000 --- a/inference/getAUC.R +++ /dev/null @@ -1,20 +0,0 @@ -# This is the original script from DM organizers. - -if (!require(pROC)) { - install.packages("pROC") -} - -library(pROC) -## These functions assume that the gold standard data and the predictions -## have already been matched - -## computes AUC and partial AUC focusing on sensitivity -## -#Assume label and prediction are matched -GetScores <- function(label, prediction, sensitivityRange = c(0.8, 1)) { - roc1 <- roc(label, prediction, direction = "<") - AUC <- auc(roc1)[1] - pAUCse <- auc(roc1, partial.auc = sensitivityRange, partial.auc.focus = "sensitivity", partial.auc.correct = FALSE)[1] - SpecAtSens <- coords(roc1, sensitivityRange[1], input = "sensitivity", ret = "specificity") - list(AUC = AUC, pAUCse = pAUCse, SpecAtSens = SpecAtSens) -} diff --git a/inference/get_exam_df.py b/inference/get_exam_df.py deleted file mode 100644 index 6a7eb02..0000000 --- a/inference/get_exam_df.py +++ /dev/null @@ -1,8 +0,0 @@ -from meta import DMMetaManager - -man = DMMetaManager(img_tsv='/metadata/images_crosswalk.tsv', - exam_tsv='/metadata/exams_metadata.tsv', - img_folder='/trainingData', - img_extension='dcm') -df = man.get_exam_df() -df.to_pickle('/modelState/exam_df.pkl') diff --git a/inference/get_exam_df.sh b/inference/get_exam_df.sh deleted file mode 100755 index 12b4b2b..0000000 --- a/inference/get_exam_df.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH - -python get_exam_df.py diff --git a/inference/sc1_candidROI_infer.sh b/inference/sc1_candidROI_infer.sh deleted file mode 100755 index 16e2400..0000000 --- a/inference/sc1_candidROI_infer.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH -# IMG_CW_TSV="./metadata/images_crosswalk_prediction.tsv" -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -IMG_FOLDER="/inferenceData" -DL_STATE="/resnet50_candidROI_local_bestAuc_model2.h5" -OUT_PRED="/output/predictions.tsv" - -python dm_sc1_candidROI_infer.py \ - --img-height 1024 \ - --img-scale 4095 \ - --roi-per-img 64 \ - --roi-size 256 256 \ - --low-int-threshold 0.05 \ - --blob-min-area 3 \ - --blob-min-int 0.5 \ - --blob-max-int 0.85 \ - --blob-th-step 10 \ - --featurewise-norm \ - --featurewise-mean 915.5 \ - --featurewise-std 735.1 \ - --img-tsv $IMG_CW_TSV \ - --dl-state $DL_STATE \ - --dl-bs 64 \ - --no-validation-mode \ - --out-pred $OUT_PRED \ - $IMG_FOLDER - diff --git a/inference/sc1_candidROI_infer_local.sh b/inference/sc1_candidROI_infer_local.sh deleted file mode 100755 index 4e2da79..0000000 --- a/inference/sc1_candidROI_infer_local.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -# IMG_CW_TSV="./metadata/images_crosswalk_prediction.tsv" -IMG_CW_TSV="./metadata/images_crosswalk.tsv" -EXAM_TSV="./metadata/exams_metadata.tsv" -IMG_FOLDER="./inferenceData" -DL_STATE="./modelState/resnet50_candidROI_local_bestAuc_model.h5" -OUT_PRED="./output/predictions.tsv" - -python dm_sc1_candidROI_infer.py \ - --img-height 1024 \ - --img-scale 4095 \ - --roi-per-img 32 \ - --roi-size 256 256 \ - --low-int-threshold 0.05 \ - --blob-min-area 3 \ - --blob-min-int 0.5 \ - --blob-max-int 0.85 \ - --blob-th-step 10 \ - --featurewise-norm \ - --featurewise-mean 915.5 \ - --featurewise-std 735.1 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --dl-state $DL_STATE \ - --dl-bs 32 \ - --validation-mode \ - --out-pred $OUT_PRED \ - $IMG_FOLDER - -Rscript ./calcAUC.R $OUT_PRED diff --git a/inference/sc1_infer.sh b/inference/sc1_infer.sh deleted file mode 100755 index 42e4faa..0000000 --- a/inference/sc1_infer.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -IMG_FOLDER="/inferenceData" -SAVED_STATE="/resnet50_288_bestAuc_model.h5" -OUT_PRED="/output/predictions.tsv" - -echo -n "Start inference: " && date -echo - -python dm_sc1_infer.py \ - --img-size 288 224 \ - --img-tsv $IMG_CW_TSV \ - --featurewise-norm \ - --featurewise-mean 485.9 \ - --featurewise-std 765.2 \ - --batch-size 32 \ - --saved-state $SAVED_STATE \ - --no-validation-mode \ - --no-use-mean \ - --out-pred $OUT_PRED \ - $IMG_FOLDER - -echo -echo -n "End inference: " && date -echo "Print number of lines of the prediction table:" -wc -l $OUT_PRED -echo "Print head of the predictions:" -head $OUT_PRED -echo "===============================" -echo "Print tail of the predictions:" -tail $OUT_PRED diff --git a/inference/sc1_infer_local.sh b/inference/sc1_infer_local.sh deleted file mode 100755 index 70bbbd6..0000000 --- a/inference/sc1_infer_local.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -# export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="./metadata/images_crosswalk_prediction.tsv" -IMG_FOLDER="./inferenceData" -SAVED_STATE="./modelState/2017-01-13_resnet50_288/resnet50_288_bestAuc_model.h5" - -# echo -n "Start training: " && date -# echo - -python dm_sc1_infer.py \ - --img-size 288 224 \ - --img-tsv $IMG_CW_TSV \ - --featurewise-norm \ - --featurewise-mean 485.9 \ - --featurewise-std 765.2 \ - --batch-size 32 \ - --saved-state $SAVED_STATE \ - --validation-mode \ - --use-mean \ - --out-pred ./output/predictions.tsv \ - $IMG_FOLDER - -# echo -# echo -n "End training: " && date diff --git a/inference/sc1_phm_infer.sh b/inference/sc1_phm_infer.sh deleted file mode 100755 index 3568de8..0000000 --- a/inference/sc1_phm_infer.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -IMG_FOLDER="/inferenceData" -IMG_EXT="dcm" -DL_STATE="/3cls_best_model5_finetuned.h5" -CLF_INFO_STATE="/model5_ftu_clf_info.pkl" -OUT="/output/predictions.tsv" -PROGRESS="/progress.txt" - -# echo -n "Start training: " && date -# echo - -python dm_sc1_phm_infer.py \ - --img-extension $IMG_EXT \ - --img-height 4096 \ - --img-scale 255.0 \ - --equalize-hist \ - --featurewise-center \ - --featurewise-mean 91.6 \ - --net resnet50 \ - --batch-size 200 \ - --patch-size 256 \ - --stride 128 \ - --no-exam-tsv \ - --img-tsv $IMG_CW_TSV \ - --no-validation-mode \ - --use-mean \ - --out-pred $OUT \ - --progress $PROGRESS \ - $IMG_FOLDER $DL_STATE $CLF_INFO_STATE - -# echo -# echo -n "End training: " && date diff --git a/inference/sc1_phm_infer_local.sh b/inference/sc1_phm_infer_local.sh deleted file mode 100755 index 3929b02..0000000 --- a/inference/sc1_phm_infer_local.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -# export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="./metadata/images_crosswalk_1subj.tsv" -# IMG_CW_TSV="./metadata/images_crosswalk.tsv" -# EXAM_TSV="./metadata/exams_metadata.tsv" -IMG_FOLDER="./inferenceData" -# IMG_FOLDER="preprocessedData/jpg_org" -IMG_EXT="dcm" -DL_STATE="modelState/2017-04-19_patch_im4096_256/3cls_best_model5_finetuned.h5" -#CLF_INFO_STATE="./modelState/2017-04-10_patch_im4096_256/model4_ftu_clf_info.pkl" -CLF_INFO_STATE="modelState/2017-04-19_patch_im4096_256/model5_ftu_clf_info.pkl" -OUT="./output/model5_ftu_predictions_1subj.tsv" -PROGRESS="./output/progress.txt" - -echo -n "Start training: " && date -echo - -python dm_sc1_phm_infer.py \ - --img-extension $IMG_EXT \ - --img-height 4096 \ - --img-scale 255.0 \ - --equalize-hist \ - --featurewise-center \ - --featurewise-mean 91.6 \ - --net resnet50 \ - --batch-size 64 \ - --patch-size 256 \ - --stride 128 \ - --no-exam-tsv \ - --img-tsv $IMG_CW_TSV \ - --no-validation-mode \ - --use-mean \ - --out-pred $OUT \ - --progress $PROGRESS \ - $IMG_FOLDER $DL_STATE $CLF_INFO_STATE - - -# echo -# echo -n "End training: " && date diff --git a/inference/sc2_infer.sh b/inference/sc2_infer.sh deleted file mode 100755 index af1b83a..0000000 --- a/inference/sc2_infer.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -EXAM_TSV="/metadata/exams_metadata.tsv" -IMG_FOLDER="/inferenceData" -# DL_STATE="./modelState/2017-01-13_resnet50_288/resnet50_288_bestAuc_model.h5" -DL_STATE="/resnet50_288_bestAuc_model_4.h5" -ENET_STATE="/enet_288_bestAuc_model.pkl" -XGB_STATE="/bst_288_bestAuc_model.pkl" -OUT_PRED="/output/predictions.tsv" - -echo -n "Start inference: " && date -echo - - # --dl-state $DL_STATE \ -python dm_sc2_infer.py \ - --img-size 288 224 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --featurewise-norm \ - --featurewise-mean 485.9 \ - --featurewise-std 765.2 \ - --enet-state $DL_STATE $ENET_STATE \ - --xgb-state $XGB_STATE \ - --no-validation-mode \ - --no-use-mean \ - --out-pred $OUT_PRED \ - $IMG_FOLDER - - -echo -echo -n "End inference: " && date -echo -echo "Print number of lines of the prediction table:" -wc -l $OUT_PRED -echo -echo "Print head of the predictions:" -head $OUT_PRED -echo "===============================" -echo "Print tail of the predictions:" -tail $OUT_PRED diff --git a/inference/sc2_infer_local.sh b/inference/sc2_infer_local.sh deleted file mode 100755 index d4d35f1..0000000 --- a/inference/sc2_infer_local.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -# export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="./metadata/images_crosswalk.tsv" -EXAM_TSV="./metadata/exams_metadata.tsv" -IMG_FOLDER="./inferenceData" -# DL_STATE="./modelState/2017-01-13_resnet50_288/resnet50_288_bestAuc_model.h5" -DL_STATE="./modelState/2017-01-15_resnet50_288_4/resnet50_288_bestAuc_model_4.h5" -ENET_STATE="./modelState/2017-01-18_enet_288/enet_288_bestAuc_model.pkl" -XGB_STATE="modelState/2017-01-24_xgb_288/xgb_2017-01-25-10am/bst_288_bestAuc_model.pkl" -OUT_PRED="./output/predictions.tsv" - - # --dl-state $DL_STATE \ -python dm_sc2_infer.py \ - --img-size 288 224 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --featurewise-norm \ - --featurewise-mean 485.9 \ - --featurewise-std 765.2 \ - --enet-state $DL_STATE $ENET_STATE \ - --xgb-state $XGB_STATE \ - --validation-mode \ - --no-use-mean \ - --out-pred $OUT_PRED \ - $IMG_FOLDER - -Rscript ./calcAUC.R $OUT_PRED diff --git a/inference/sc2_phm_infer.sh b/inference/sc2_phm_infer.sh deleted file mode 100755 index 2963a42..0000000 --- a/inference/sc2_phm_infer.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -EXAM_TSV="/metadata/exams_metadata.tsv" -IMG_FOLDER="/inferenceData" -IMG_EXT="dcm" -DL_STATE="/3cls_best_model5_finetuned.h5" -CLF_INFO_STATE="/model5_ftu_clf_info.pkl" -META_CLF_STATE="/model5_ftu_based_meta_clf.pkl" -OUT="/output/predictions.tsv" -PROGRESS="/progress.txt" - -echo -n "Start training: " && date -echo - -python dm_sc2_phm_infer.py \ - --img-extension $IMG_EXT \ - --img-height 4096 \ - --img-scale 255.0 \ - --equalize-hist \ - --featurewise-center \ - --featurewise-mean 91.6 \ - --net resnet50 \ - --batch-size 200 \ - --patch-size 256 \ - --stride 128 \ - --exam-tsv $EXAM_TSV \ - --img-tsv $IMG_CW_TSV \ - --no-validation-mode \ - --use-mean \ - --out-pred $OUT \ - --progress $PROGRESS \ - $IMG_FOLDER $DL_STATE $CLF_INFO_STATE $META_CLF_STATE - - -# echo -# echo -n "End training: " && date diff --git a/inference/sc2_phm_infer_local.sh b/inference/sc2_phm_infer_local.sh deleted file mode 100755 index dfb4ed2..0000000 --- a/inference/sc2_phm_infer_local.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# export PYTHONPATH=/:$PYTHONPATH -# IMG_CW_TSV="./metadata/images_crosswalk_prediction.tsv" -IMG_CW_TSV="./metadata/images_crosswalk.tsv" -EXAM_TSV="./metadata/exams_metadata.tsv" -IMG_FOLDER="./inferenceData" -# IMG_FOLDER="preprocessedData/jpg_org" -IMG_EXT="dcm" -DL_STATE="modelState/2017-04-19_patch_im4096_256/3cls_best_model5_finetuned.h5" -#CLF_INFO_STATE="./modelState/2017-04-10_patch_im4096_256/model4_ftu_clf_info.pkl" -CLF_INFO_STATE="modelState/2017-04-19_patch_im4096_256/model5_ftu_clf_info.pkl" -META_CLF_STATE="modelState/2017-04-19_patch_im4096_256/model5_ftu_based_meta_clf.pkl" -OUT="./output/model5_ftu_predictions_sc2_scratch.tsv" -PROGRESS="./output/progress.txt" - -echo -n "Start training: " && date -echo - -python dm_sc2_phm_infer.py \ - --img-extension $IMG_EXT \ - --img-height 4096 \ - --img-scale 255.0 \ - --equalize-hist \ - --featurewise-center \ - --featurewise-mean 91.6 \ - --net resnet50 \ - --batch-size 64 \ - --patch-size 256 \ - --stride 128 \ - --exam-tsv $EXAM_TSV \ - --img-tsv $IMG_CW_TSV \ - --validation-mode \ - --use-mean \ - --out-pred $OUT \ - --progress $PROGRESS \ - $IMG_FOLDER $DL_STATE $CLF_INFO_STATE $META_CLF_STATE - - -# echo -# echo -n "End training: " && date diff --git a/inference/train_get_score.sh b/inference/train_get_score.sh deleted file mode 100755 index a873078..0000000 --- a/inference/train_get_score.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -EXAM_TSV="/metadata/exams_metadata.tsv" -IMG_FOLDER="/trainingData" -# DL_STATE="./modelState/2017-01-13_resnet50_288/resnet50_288_bestAuc_model.h5" -DL_STATE="/resnet50_288_bestAuc_model_4.h5" -ENET_STATE="/enet_288_bestAuc_model.pkl" -OUT_PRED="/modelState/predictions.tsv" - - # --dl-state $DL_STATE \ -python dm_sc1_infer.py \ - --img-size 288 224 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --featurewise-norm \ - --featurewise-mean 485.9 \ - --featurewise-std 765.2 \ - --batch-size 32 \ - --enet-state $DL_STATE $ENET_STATE \ - --validation-mode \ - --no-use-mean \ - --out-pred $OUT_PRED \ - $IMG_FOLDER - -Rscript ./calcAUC.R $OUT_PRED diff --git a/pilot_images_note.txt b/pilot_images_note.txt deleted file mode 100644 index e92bc4f..0000000 --- a/pilot_images_note.txt +++ /dev/null @@ -1,13 +0,0 @@ -121370 artifact on breast -121393 artifact on breast -267618 large bright area on breast -267624 large bright area on breast and there is pectoral muscle -317918 spots on pectoral muscle -327015 pectoral muscle does not have high itensity -327017 pectoral muscle boundary is dark; spot on pectoral muscle -351992 there is microcalcification on nipple? -351994 there is microcalcification on nipple? pectoral muscle has complex structure -453532 thick vertical line and strange patterns -453533 thick vertical line and strange patterns -502858 high intensity area in CC view -566508 high intensity area in CC view \ No newline at end of file diff --git a/preprocessing/Dockerfile b/preprocessing/Dockerfile deleted file mode 100644 index 0781be1..0000000 --- a/preprocessing/Dockerfile +++ /dev/null @@ -1,101 +0,0 @@ -FROM ubuntu:16.04 -MAINTAINER Li Shen - -# KEEP UBUNTU OR DEBIAN UP TO DATE -RUN echo "deb http://us.archive.ubuntu.com/ubuntu xenial main multiverse" >> /etc/apt/sources.list -RUN apt-get -y update -RUN apt-get -y install apt-utils -ENV TERM xterm-256color # just to turn off some warnings. -RUN apt-get -y upgrade \ - && apt-get -y autoremove - - -# =================== OpenCV ====================== # -# Build tools. -RUN apt-get install -y build-essential cmake git - -# OpenCV2 development, parallelism, linear algebra. -RUN apt-get install -y libtbb-dev libeigen3-dev -# libopencv-dev - -# 1394 Camera. -RUN apt-get install -y libdc1394-22 libdc1394-22-dev - -# GUI (if you want to use GTK instead of Qt, replace 'qt5-default' with 'libgtkglext1-dev' and remove '-DWITH_QT=ON' option in CMake): -RUN apt-get install -y qt5-default libvtk6-dev libgtk-3-dev qtbase5-dev - -# Media I/O: -RUN apt-get install -y zlib1g-dev libwebp-dev libjasper-dev libopenexr-dev libgdal-dev libavcodec-dev libavformat-dev libfaac-dev libmp3lame-dev libopencore-amrnb-dev libvorbis-dev - -# Video I/O: -RUN apt-get install -y libx264-dev yasm x264 ffmpeg libxine2-dev libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev libv4l-dev v4l-utils libtheora-dev libxvidcore-dev - -# Anaconda: -# WORKDIR / -# RUN wget https://repo.continuum.io/archive/Anaconda2-4.2.0-Linux-x86_64.sh -O anaconda.sh -# RUN bash anaconda.sh -b -p anaconda -# ENV PATH "/anaconda/bin:$PATH" -# RUN conda upgrade --all -y -# RUN echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" - -# Python: -RUN apt-get install -y python-dev python-tk python-numpy python3-dev python3-tk python3-numpy - -# Documentation: -RUN apt-get install -y doxygen - -# Additional packages (Later added when tested on my Ubuntu 16.04 box): -RUN apt-get install -y libavresample-dev libgphoto2-dev -RUN apt-get install -y python-gst-1.0 python-gst0.10 python-gst0.10-dev python3-gst-1.0 - -# Image I/O: -RUN apt-get install -y libjpeg-dev libpng12-dev libswscale-dev -# libtiff5 libtiff5-dev - -# INSTALL THE OpenCV LIBRARY (YOU CAN CHANGE '3.1.0' FOR THE LAST STABLE VERSION) -WORKDIR / -RUN apt-get install -y wget unzip -RUN wget https://github.com/Itseez/opencv/archive/3.1.0.zip -O 3.1.0.zip -RUN unzip 3.1.0.zip \ - && rm 3.1.0.zip \ - && mv opencv-3.1.0 OpenCV -WORKDIR OpenCV -WORKDIR build -RUN cmake -D CMAKE_BUILD_TYPE=Release \ - -D CMAKE_INSTALL_PREFIX=/usr/local \ - -D WITH_TBB=ON -D WITH_V4L=ON -D WITH_QT=ON -D WITH_OPENGL=ON -D WITH_GSTREAMER=OFF \ - -D WITH_TIFF=ON -D BUILD_TIFF=ON \ - -D WITH_OPENMP=ON \ - -D BUILD_EXAMPLES=OFF \ - -D CUDA_NVCC_FLAGS="-D_FORCE_INLINES" \ - .. - # -D CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda \ - # -D PYTHON2_EXECUTABLE=/anaconda/bin/python \ - # -D PYTHON_INCLUDE_DIR=/anaconda/include/python2.7/ \ - # -D PYTHON_LIBRARY=/anaconda/lib/libpython2.7.so \ - # -D PYTHON2_NUMPY_INCLUDE_DIRS=/anaconda/lib/python2.7/site-packages/numpy/core/include/ \ - -# make and install. -RUN make clean && make -j7 && make install -RUN bash -c 'echo "/usr/local/lib" > /etc/ld.so.conf.d/opencv.conf' && ldconfig - -# >>>>> Also need to create a link to cv2.so if anaconda is being used. <<<<<<< -# RUN ln -s /usr/local/lib/python2.7/site-packages/cv2.so /anaconda/lib/python2.7/cv2.so -RUN python -c "import cv2; print cv2.__version__" - - -# ============================================== # -RUN apt-get -y update && apt-get install -y parallel imagemagick - - -# ============================================== # -# Copy processing and training files: -WORKDIR / -COPY preprocess.py ./ -COPY preprocess.sh ./ -# COPY train.sh ./ - - - - - diff --git a/preprocessing/preprocess.py b/preprocessing/preprocess.py deleted file mode 100644 index f0f770a..0000000 --- a/preprocessing/preprocess.py +++ /dev/null @@ -1,45 +0,0 @@ -import cv2 -import argparse -from dm_preprocess import DMImagePreprocessor - - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser() - parser.add_argument('input', type=str, help='input image path') - parser.add_argument('output', type=str, help='output image path') - parser.add_argument('--remove-pectoral', dest='pect', action='store_true', - help='whether to remove the pectoral muscle region or \ - not') - args = parser.parse_args() - # Preprocess the input image and write to an output image. - preprocessor = DMImagePreprocessor() - img_in = cv2.imread(args.input, cv2.IMREAD_GRAYSCALE) - img_out, _ = preprocessor.process(img_in, pect_removal=args.pect) - cv2.imwrite(args.output, img_out) - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/preprocessing/preprocess.sh b/preprocessing/preprocess.sh deleted file mode 100755 index 58606a4..0000000 --- a/preprocessing/preprocess.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -TRAIN_DATA_DIR=/trainingData -CONVERTED_PNG_DIR=/preprocessedData/png_org -RESIZED_PNG_DIR1=/preprocessedData/png_1152x896 -RESIZED_PNG_DIR2=/preprocessedData/png_576x448 -RESIZED_PNG_DIR3=/preprocessedData/png_288x224 -# IMG_TSV=./metadata/images_crosswalk.tsv - -echo "[$(date)] >>> Create folders for converted and processed .png files" -mkdir -p $CONVERTED_PNG_DIR $RESIZED_PNG_DIR1 $RESIZED_PNG_DIR2 $RESIZED_PNG_DIR3 -echo - -echo "[$(date)] >>> Convert .dcm files to .png files" -find $TRAIN_DATA_DIR/ -maxdepth 1 -name '*.dcm' | parallel --no-notice "convert {} $CONVERTED_PNG_DIR/{/.}.png" -echo "[$(date)] Done converted $(find $CONVERTED_PNG_DIR/ -name '*.png'|wc -l) dcm files to png files." -echo - -echo "[$(date)] >>> Resize .png files to 1152x896 (HxW)" -find $CONVERTED_PNG_DIR/ -maxdepth 1 -name '*.png' | parallel --no-notice "convert {} -resize 896x1152! $RESIZED_PNG_DIR1/{/.}.png" -echo "[$(date)] Done resized $(find $RESIZED_PNG_DIR1/ -name '*.png'|wc -l) png files." -echo - -echo "[$(date)] >>> Resize .png files to 576x448 (HxW)" -find $CONVERTED_PNG_DIR/ -maxdepth 1 -name '*.png' | parallel --no-notice "convert {} -resize 448x576! $RESIZED_PNG_DIR2/{/.}.png" -echo "[$(date)] Done resized $(find $RESIZED_PNG_DIR2/ -name '*.png'|wc -l) png files." -echo - -echo "[$(date)] >>> Resize .png files to 288x224 (HxW)" -find $CONVERTED_PNG_DIR/ -maxdepth 1 -name '*.png' | parallel --no-notice "convert {} -resize 224x288! $RESIZED_PNG_DIR3/{/.}.png" -echo "[$(date)] Done resized $(find $RESIZED_PNG_DIR3/ -name '*.png'|wc -l) png files." -echo - -# find $TRAIN_DATA_DIR/ -maxdepth 1 -name '*.dcm' | parallel "echo 'convert: {} => $CONVERTED_PNG_DIR/{/.}.png'; convert {} $CONVERTED_PNG_DIR/{/.}.png" - -# echo "[$(date)] >>> Process .png files using the image preprocessor" -# awk 'NR > 1 && $4 == "MLO" {print $6}' $IMG_TSV | parallel "echo 'process MLO: $CONVERTED_PNG_DIR/{/.}.png => $PROCESSED_PNG_DIR/{/.}.png'; python preprocess.py --remove-pectoral $CONVERTED_PNG_DIR/{/.}.png $PROCESSED_PNG_DIR/{/.}.png" -# awk 'NR > 1 && $4 == "MLO" {print $6}' $IMG_TSV | parallel "python preprocess.py --remove-pectoral $CONVERTED_PNG_DIR/{/.}.png $PROCESSED_PNG_DIR/{/.}.png" -# awk 'NR > 1 && $4 != "MLO" {print $6}' $IMG_TSV | parallel "echo 'process non-MLO: $CONVERTED_PNG_DIR/{/.}.png => $PROCESSED_PNG_DIR/{/.}.png'; python preprocess.py $CONVERTED_PNG_DIR/{/.}.png $PROCESSED_PNG_DIR/{/.}.png" -# awk 'NR > 1 && $4 != "MLO" {print $6}' $IMG_TSV | parallel "python preprocess.py $CONVERTED_PNG_DIR/{/.}.png $PROCESSED_PNG_DIR/{/.}.png" -# find $PROCESSED_PNG_DIR -name '*.png' -execdir ls -lh {} \; -# echo "[$(date)] Done processed $(find $PROCESSED_PNG_DIR/ -name '*.png'|wc -l) png files." -# echo diff --git a/preprocessing/preprocess_local.sh b/preprocessing/preprocess_local.sh deleted file mode 100755 index 4907df1..0000000 --- a/preprocessing/preprocess_local.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -TRAIN_DATA_DIR=./trainingData -CONVERTED_PNG_DIR=./preprocessedData/png_org -RESIZED_PNG_DIR1=./preprocessedData/png_1152x896 -RESIZED_PNG_DIR2=./preprocessedData/png_576x448 -RESIZED_PNG_DIR3=./preprocessedData/png_288x224 -# IMG_TSV=./metadata/images_crosswalk.tsv - -echo "[$(date)] >>> Create folders for converted and processed .png files" -mkdir -p $CONVERTED_PNG_DIR $RESIZED_PNG_DIR1 $RESIZED_PNG_DIR2 $RESIZED_PNG_DIR3 -echo - -echo "[$(date)] >>> Convert .dcm files to .png files" -find $TRAIN_DATA_DIR/ -maxdepth 1 -name '*.dcm' | parallel --no-notice "convert {} $CONVERTED_PNG_DIR/{/.}.png" -echo "[$(date)] Done converted $(find $CONVERTED_PNG_DIR/ -name '*.png'|wc -l) dcm files to png files." -echo - -echo "[$(date)] >>> Resize .png files to 1152x896 (HxW)" -find $CONVERTED_PNG_DIR/ -maxdepth 1 -name '*.png' | parallel --no-notice "convert {} -resize 896x1152! $RESIZED_PNG_DIR1/{/.}.png" -echo "[$(date)] Done resized $(find $RESIZED_PNG_DIR1/ -name '*.png'|wc -l) png files." -echo - -echo "[$(date)] >>> Resize .png files to 576x448 (HxW)" -find $CONVERTED_PNG_DIR/ -maxdepth 1 -name '*.png' | parallel --no-notice "convert {} -resize 448x576! $RESIZED_PNG_DIR2/{/.}.png" -echo "[$(date)] Done resized $(find $RESIZED_PNG_DIR2/ -name '*.png'|wc -l) png files." -echo - -echo "[$(date)] >>> Resize .png files to 288x224 (HxW)" -find $CONVERTED_PNG_DIR/ -maxdepth 1 -name '*.png' | parallel --no-notice "convert {} -resize 224x288! $RESIZED_PNG_DIR3/{/.}.png" -echo "[$(date)] Done resized $(find $RESIZED_PNG_DIR3/ -name '*.png'|wc -l) png files." -echo - -# find $TRAIN_DATA_DIR/ -maxdepth 1 -name '*.dcm' | parallel "echo 'convert: {} => $CONVERTED_PNG_DIR/{/.}.png'; convert {} $CONVERTED_PNG_DIR/{/.}.png" - -# echo "[$(date)] >>> Process .png files using the image preprocessor" -# awk 'NR > 1 && $4 == "MLO" {print $6}' $IMG_TSV | parallel "echo 'process MLO: $CONVERTED_PNG_DIR/{/.}.png => $PROCESSED_PNG_DIR/{/.}.png'; python preprocess.py --remove-pectoral $CONVERTED_PNG_DIR/{/.}.png $PROCESSED_PNG_DIR/{/.}.png" -# awk 'NR > 1 && $4 == "MLO" {print $6}' $IMG_TSV | parallel "python preprocess.py --remove-pectoral $CONVERTED_PNG_DIR/{/.}.png $PROCESSED_PNG_DIR/{/.}.png" -# awk 'NR > 1 && $4 != "MLO" {print $6}' $IMG_TSV | parallel "echo 'process non-MLO: $CONVERTED_PNG_DIR/{/.}.png => $PROCESSED_PNG_DIR/{/.}.png'; python preprocess.py $CONVERTED_PNG_DIR/{/.}.png $PROCESSED_PNG_DIR/{/.}.png" -# awk 'NR > 1 && $4 != "MLO" {print $6}' $IMG_TSV | parallel "python preprocess.py $CONVERTED_PNG_DIR/{/.}.png $PROCESSED_PNG_DIR/{/.}.png" -# find $PROCESSED_PNG_DIR -name '*.png' -execdir ls -lh {} \; -# echo "[$(date)] Done processed $(find $PROCESSED_PNG_DIR/ -name '*.png'|wc -l) png files." -# echo diff --git a/score_sc1.sh b/score_sc1.sh deleted file mode 100755 index 136715a..0000000 --- a/score_sc1.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -# -# Digital Mammography DREAM Challenge -# Testing inference method - -# Run testing -python DREAM_DM_starter_tf.py --net GoogLe --ms 224 --test 1 --out /output/out_1.txt --pf /scoringData &> /output/out_2.txt diff --git a/test.sh b/test.sh deleted file mode 100755 index 136715a..0000000 --- a/test.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -# -# Digital Mammography DREAM Challenge -# Testing inference method - -# Run testing -python DREAM_DM_starter_tf.py --net GoogLe --ms 224 --test 1 --out /output/out_1.txt --pf /scoringData &> /output/out_2.txt diff --git a/training/Dockerfile b/training/Dockerfile deleted file mode 100644 index 0a43238..0000000 --- a/training/Dockerfile +++ /dev/null @@ -1,56 +0,0 @@ -FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04 -MAINTAINER Li Shen - -# ============== OpenCV2, numpy, scipy =================== # -RUN echo "deb http://us.archive.ubuntu.com/ubuntu xenial main multiverse" >> /etc/apt/sources.list -ENV TERM xterm-256color # just to turn off some warnings. -RUN apt-get -y update && \ - apt-get install -y --no-install-recommends python-opencv python-numpy python-scipy && \ - apt-get autoclean && \ - apt-get autoremove -RUN python -c "import cv2; print cv2.__version__" - -# =======================Parallel & convert ==================== # -RUN apt-get -y update && \ - apt-get install -y --no-install-recommends parallel imagemagick && \ - apt-get autoclean && \ - apt-get autoremove - -# ===================== Tensorflow ======================# -ENV PATH /usr/local/cuda/bin${PATH:+:${PATH}} -ENV LD_LIBRARY_PATH /usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} -RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nvidia-367-dev && \ - apt-get autoclean && \ - apt-get autoremove -RUN apt-get install -y --no-install-recommends python-pip python-dev && \ - pip install -U pip && \ - apt-get autoclean && \ - apt-get autoremove -RUN pip install -U tensorflow-gpu - -# ====================== Sklearn ========================# -RUN pip install -U scikit-learn -RUN python -c "import sklearn; print sklearn.__version__" - -# ====================== Keras ==========================# -RUN pip install -U pyyaml six h5py pydot-ng -WORKDIR / -RUN echo "Keras last updated: 2016-12-22" -RUN apt-get install -y --no-install-recommends git && \ - git clone https://github.com/lishen/keras.git -WORKDIR keras -RUN git checkout fix-1-channel-samplewise-std -RUN python setup.py install -RUN python -c "import keras; print keras.__version__" - -# ============================================== # -# Copy processing and training files: -RUN pip install -U pandas pydicom -WORKDIR / -COPY train.sh train_small.sh ./ -COPY temp/*.py dm_resnet_train.py ./ -COPY modelState/2017-01-11_resnet47rb5_576/dmresnet47rb5_576_bestAuc_model.h5 ./ -# VOLUME ["/metadata", "/trainingData", "/preprocessedData", "/modelState", "/scratch"] - - - diff --git a/training/dm_bow_train.py b/training/dm_bow_train.py deleted file mode 100644 index 10fdd82..0000000 --- a/training/dm_bow_train.py +++ /dev/null @@ -1,532 +0,0 @@ -import argparse -import os, sys -import pickle -import numpy as np -from numpy.random import RandomState -from scipy.sparse import lil_matrix -import tensorflow as tf -from sklearn.model_selection import train_test_split -from sklearn.decomposition import PCA -from sklearn.cluster import MiniBatchKMeans -from keras.models import load_model -from dm_enet import DLRepr -from meta import DMMetaManager -from dm_image import DMImageDataGenerator -from dm_keras_ext import DMMetrics as dmm - -import warnings -import exceptions -warnings.filterwarnings('ignore', category=exceptions.UserWarning) - - -def sample_roi_repr(roi_generator, sample_per_batch, nb_samples, repr_model, - batch_size=32, random_seed=12345, q_size=20): - '''Sample candidate ROIs and then extract their DL representations - ''' - samples_seen = 0 - repr_list = [] - roi_q = [] # a queue for candid ROIs before they are scored. - while samples_seen < nb_samples: - rng = RandomState(samples_seen + random_seed) - X,w = roi_generator.next() - w /= w.sum() - ri = rng.choice(len(X), sample_per_batch, replace=False, p=w) - roi_q.append(X[ri]) - samples_seen += len(ri) - if len(roi_q) >= q_size: - X_q = np.concatenate(roi_q) - repr_list.append(repr_model.predict(X_q, batch_size=batch_size)) - roi_q = [] - if len(roi_q) > 0: - X_q = np.concatenate(roi_q) - repr_list.append(repr_model.predict(X_q, batch_size=batch_size)) - roi_q = [] - return np.concatenate(repr_list) - - -def get_exam_bow_dat(exam_list, nb_words, roi_per_img, - img_list=None, prob_out=None, clust_list=None, - imgen=None, clf_list=None, transformer=None, **kw_args): - '''Get the BoW count matrix for an exam list - ''' - if img_list is not None: - if prob_out is None or clust_list is None: - raise Exception("When img_list is not None, [prob_out, clust_list]" - " must not be None") - img_idx_tab = dict(zip(img_list, range(len(img_list)))) - elif imgen is None or clf_list is None: - raise Exception("When img_list is None, [imgen, clf_list] must not" - " be None") - else: - pass - - ##################################################### - def get_prob_repr( - case_all_imgs, target_height, target_scale, - img_per_batch, roi_size, - low_int_threshold, blob_min_area, blob_min_int, blob_max_int, - blob_th_step, - seed, dlrepr_model): - '''Get prob and DL representations for all ROIs for all images of a case - ''' - roi_generator = imgen.flow_from_candid_roi( - case_all_imgs, - target_height=target_height, target_scale=target_scale, - class_mode=None, validation_mode=True, - img_per_batch=img_per_batch, roi_per_img=roi_per_img, - roi_size=roi_size, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, - roi_clf=None, seed=seed) - - pred = dlrepr_model.predict_generator( - roi_generator, val_samples=roi_per_img*len(case_all_imgs)) - # Split representation and prob. - dl_repr = pred[0] - dl_repr = dl_repr.reshape((-1,dl_repr.shape[-1])) # flatten feature maps. - prob_ = pred[1] - if prob_.shape[1] == 3: - prob_ = prob_[:, 1] # cancer class prob. - prob_ = prob_.reshape((len(case_all_imgs),-1)) # img x roi prob - return prob_, dl_repr - - def get_prob_out(case_all_imgs): - iis = np.array([ img_idx_tab[img] for img in case_all_imgs]) - return prob_out[iis] - - def get_clust_labs(case_all_imgs, clust): - iis = np.array([ img_idx_tab[img] for img in case_all_imgs]) - return clust[iis].ravel() # flattened clust labs for all imgs. - - def get_breast_prob_clust(case_all_imgs): - '''Get prob and clust labs for all codebooks for one breast - ''' - if img_list is not None: - prob_ = get_prob_out(case_all_imgs) - clust_ = [ get_clust_labs(case_all_imgs, clust) - for clust in clust_list] - else: - prob_, roi_repr = get_prob_repr(case_all_imgs, **kw_args) - if transformer is not None: - roi_repr = transformer.transform(roi_repr) - clust_ = [ clf.predict(roi_repr).ravel() - for clf in clf_list] - return prob_, clust_ - ##################################################### - - - bow_list = [ lil_matrix((len(exam_list)*2, n), dtype='uint16') - for n in nb_words] - meta_prob_list = [] - ri = 0 - for subj, exidx, exam in exam_list: - try: - cancerL = int(exam['L']['cancer']) - except ValueError: - cancerL = 0 - try: - cancerR = int(exam['R']['cancer']) - except ValueError: - cancerR = 0 - - try: - probL, clustL = get_breast_prob_clust(exam['L']['img']) - for i,bow in enumerate(bow_list): - for ci in clustL[i]: - bow[ri, ci] += 1 - except KeyError: # unimaged breast. - # import pdb; pdb.set_trace() - probL = np.array([[.0]*roi_per_img]) - ri += 1 - meta_prob_list.append((subj, exidx, 'L', cancerL, probL)) - - try: - probR, clustR = get_breast_prob_clust(exam['R']['img']) - for i,bow in enumerate(bow_list): - for ci in clustR[i]: - bow[ri, ci] += 1 - except KeyError: # unimaged breast. - probR = np.array([[.0]*roi_per_img]) - ri += 1 - meta_prob_list.append((subj, exidx, 'R', cancerR, probR)) - - return meta_prob_list, bow_list - - -def run(img_folder, dl_state, img_extension='dcm', - img_height=1024, img_scale=4095, val_size=.2, neg_vs_pos_ratio=10., - do_featurewise_norm=True, featurewise_mean=873.6, featurewise_std=739.3, - img_per_batch=2, roi_per_img=32, roi_size=(256, 256), - low_int_threshold=.05, blob_min_area=3, - blob_min_int=.5, blob_max_int=.85, blob_th_step=10, - layer_name=['flatten_1', 'dense_1'], layer_index=None, - roi_state=None, roi_clf_bs=32, - pc_components=.95, pc_whiten=True, - nb_words=[512], km_max_iter=100, km_bs=1000, km_patience=20, km_init=10, - exam_tsv='./metadata/exams_metadata.tsv', - img_tsv='./metadata/images_crosswalk.tsv', - pca_km_states='./modelState/dlrepr_pca_km_models.pkl', - bow_train_out='./modelState/bow_dat_train.pkl', - bow_test_out='./modelState/bow_dat_test.pkl'): - '''Calculate bag of deep visual words count matrix for all breasts - ''' - - # Read some env variables. - random_seed = int(os.getenv('RANDOM_SEED', 12345)) - rng = RandomState(random_seed) # an rng used across board. - - # Load and split image and label lists. - meta_man = DMMetaManager(exam_tsv=exam_tsv, - img_tsv=img_tsv, - img_folder=img_folder, - img_extension=img_extension) - subj_list, subj_labs = meta_man.get_subj_labs() - subj_train, subj_test, labs_train, labs_test = train_test_split( - subj_list, subj_labs, test_size=val_size, stratify=subj_labs, - random_state=random_seed) - if neg_vs_pos_ratio is not None: - def subset_subj(subj, labs): - subj = np.array(subj) - labs = np.array(labs) - pos_idx = np.where(labs==1)[0] - neg_idx = np.where(labs==0)[0] - nb_neg_desired = int(len(pos_idx)*neg_vs_pos_ratio) - if nb_neg_desired >= len(neg_idx): - return subj.tolist() - else: - neg_chosen = rng.choice(neg_idx, nb_neg_desired, replace=False) - subset_idx = np.concatenate([pos_idx, neg_chosen]) - return subj[subset_idx].tolist() - - subj_train = subset_subj(subj_train, labs_train) - subj_test = subset_subj(subj_test, labs_test) - - img_list, lab_list = meta_man.get_flatten_img_list(subj_train) - lab_list = np.array(lab_list) - print "Train set - Nb of positive images: %d, Nb of negative images: %d" \ - % ( (lab_list==1).sum(), (lab_list==0).sum()) - sys.stdout.flush() - - # Create image generator for ROIs for representation extraction. - print "Create an image generator for ROIs"; sys.stdout.flush() - if do_featurewise_norm: - imgen = DMImageDataGenerator( - featurewise_center=True, - featurewise_std_normalization=True) - imgen.mean = featurewise_mean - imgen.std = featurewise_std - else: - imgen = DMImageDataGenerator( - samplewise_center=True, - samplewise_std_normalization=True) - - # Load ROI classifier. - if roi_state is not None: - print "Load ROI classifier"; sys.stdout.flush() - roi_clf = load_model( - roi_state, - custom_objects={ - 'sensitivity': dmm.sensitivity, - 'specificity': dmm.specificity - } - ) - graph = tf.get_default_graph() - else: - roi_clf = None - graph = None - - # Create ROI generators for pos and neg images separately. - print "Create ROI generators for pos and neg images" - sys.stdout.flush() - roi_generator = imgen.flow_from_candid_roi( - img_list, target_height=img_height, target_scale=img_scale, - class_mode=None, validation_mode=True, - img_per_batch=img_per_batch, roi_per_img=roi_per_img, - roi_size=roi_size, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, - tf_graph=graph, roi_clf=roi_clf, clf_bs=roi_clf_bs, - return_sample_weight=False, seed=random_seed) - - # Generate image patches and extract their DL representations. - print "Load DL representation model"; sys.stdout.flush() - dlrepr_model = DLRepr( - dl_state, - custom_objects={ - 'sensitivity': dmm.sensitivity, - 'specificity': dmm.specificity - }, - layer_name=layer_name, - layer_index=layer_index) - last_output_size = dlrepr_model.get_output_shape()[-1][-1] - if last_output_size != 3 and last_output_size != 1: - raise Exception("The last output must be prob outputs (size=3 or 1)") - - nb_tot_samples = len(img_list)*roi_per_img - print "Extract ROIs from pos and neg images"; sys.stdout.flush() - pred = dlrepr_model.predict_generator(roi_generator, - val_samples=nb_tot_samples) - for i,d in enumerate(pred): - print "Shape of representation/output data %d:" % (i), d.shape - sys.stdout.flush() - - # Flatten feature maps, e.g. an 8x8 feature map will become a 64-d vector. - pred = [ d.reshape((-1,d.shape[-1])) for d in pred] - for i,d in enumerate(pred): - print "Shape of flattened data %d:" % (i), d.shape - sys.stdout.flush() - - # Split representations and prob outputs. - dl_repr = pred[0] - prob_out = pred[1] - if prob_out.shape[1] == 3: - prob_out = prob_out[:, 1] # pos class. - prob_out = prob_out.reshape((len(img_list),-1)) - print "Reshape prob output to:", prob_out.shape; sys.stdout.flush() - - # Use PCA to reduce dimension of the representation data. - if pc_components is not None: - print "Start PCA dimension reduction on DL representation" - sys.stdout.flush() - pca = PCA(n_components=pc_components, whiten=pc_whiten) - pca.fit(dl_repr) - print "Nb of PCA components:", pca.n_components_ - print "Total explained variance ratio: %.4f" % \ - (pca.explained_variance_ratio_.sum()) - dl_repr_pca = pca.transform(dl_repr) - print "Shape of transformed representation data:", dl_repr_pca.shape - sys.stdout.flush() - else: - pca = None - - # Use K-means to create a codebook for deep visual words. - print "Start K-means training on DL representation" - sys.stdout.flush() - clf_list = [] - clust_list = [] - # Shuffling indices for mini-batches learning. - perm_idx = rng.permutation(len(dl_repr)) - for n in nb_words: - print "Train K-means with %d cluster centers" % (n) - sys.stdout.flush() - clf = MiniBatchKMeans(n_clusters=n, init='k-means++', - max_iter=km_max_iter, batch_size=km_bs, - compute_labels=True, random_state=random_seed, - tol=0.0, max_no_improvement=km_patience, - init_size=None, n_init=km_init, - reassignment_ratio=0.01, verbose=0) - clf.fit(dl_repr[perm_idx]) - clf_list.append(clf) - clust = np.zeros_like(clf.labels_) - clust[perm_idx] = clf.labels_ - clust = clust.reshape((len(img_list),-1)) - clust_list.append(clust) - - if pca is not None: - print "Start K-means training on transformed representation" - sys.stdout.flush() - clf_list_pca = [] - clust_list_pca = [] - # Shuffling indices for mini-batches learning. - perm_idx = rng.permutation(len(dl_repr_pca)) - for n in nb_words: - print "Train K-means with %d cluster centers" % (n) - sys.stdout.flush() - clf = MiniBatchKMeans(n_clusters=n, init='k-means++', - max_iter=km_max_iter, batch_size=km_bs, - compute_labels=True, random_state=random_seed, - tol=0.0, max_no_improvement=km_patience, - init_size=None, n_init=km_init, - reassignment_ratio=0.01, verbose=0) - clf.fit(dl_repr_pca[perm_idx]) - clf_list_pca.append(clf) - clust = np.zeros_like(clf.labels_) - clust[perm_idx] = clf.labels_ - clust = clust.reshape((len(img_list),-1)) - clust_list_pca.append(clust) - - - # Read exam lists. - exam_train = meta_man.get_flatten_exam_list( - subj_train, flatten_img_list=True) - exam_test = meta_man.get_flatten_exam_list( - subj_test, flatten_img_list=True) - exam_labs_train = np.array(meta_man.exam_labs(exam_train)) - exam_labs_test = np.array(meta_man.exam_labs(exam_test)) - nb_pos_exams_train = (exam_labs_train==1).sum() - nb_neg_exams_train = (exam_labs_train==0).sum() - nb_pos_exams_test = (exam_labs_test==1).sum() - nb_neg_exams_test = (exam_labs_test==0).sum() - print "Train set - Nb of pos exams: %d, Nb of neg exams: %d" % \ - (nb_pos_exams_train, nb_neg_exams_train) - print "Test set - Nb of pos exams: %d, Nb of neg exams: %d" % \ - (nb_pos_exams_test, nb_neg_exams_test) - - # Do BoW counts for each breast. - print "BoW counting for train exam list"; sys.stdout.flush() - bow_dat_train = get_exam_bow_dat( - exam_train, nb_words, roi_per_img, - img_list=img_list, prob_out=prob_out, clust_list=clust_list) - for i,d in enumerate(bow_dat_train[1]): - print "Shape of train BoW matrix %d:" % (i), d.shape - sys.stdout.flush() - - print "BoW counting for test exam list"; sys.stdout.flush() - bow_dat_test = get_exam_bow_dat( - exam_test, nb_words, roi_per_img, - imgen=imgen, clf_list=clf_list, transformer=None, - target_height=img_height, target_scale=img_scale, - img_per_batch=img_per_batch, roi_size=roi_size, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, seed=random_seed, - dlrepr_model=dlrepr_model) - for i,d in enumerate(bow_dat_test[1]): - print "Shape of test BoW matrix %d:" % (i), d.shape - sys.stdout.flush() - - if pca is not None: - print "== Do same BoW counting on PCA transformed data ==" - print "BoW counting for train exam list"; sys.stdout.flush() - bow_dat_train_pca = get_exam_bow_dat( - exam_train, nb_words, roi_per_img, - img_list=img_list, prob_out=prob_out, clust_list=clust_list_pca) - for i,d in enumerate(bow_dat_train_pca[1]): - print "Shape of train BoW matrix %d:" % (i), d.shape - sys.stdout.flush() - - print "BoW counting for test exam list"; sys.stdout.flush() - bow_dat_test_pca = get_exam_bow_dat( - exam_test, nb_words, roi_per_img, - imgen=imgen, clf_list=clf_list_pca, transformer=pca, - target_height=img_height, target_scale=img_scale, - img_per_batch=img_per_batch, roi_size=roi_size, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, seed=random_seed, - dlrepr_model=dlrepr_model) - for i,d in enumerate(bow_dat_test_pca[1]): - print "Shape of test BoW matrix %d:" % (i), d.shape - sys.stdout.flush() - - - # Save K-means model and BoW count data. - if pca is None: - pickle.dump(clf_list, open(pca_km_states, 'w')) - pickle.dump(bow_dat_train, open(bow_train_out, 'w')) - pickle.dump(bow_dat_test, open(bow_test_out, 'w')) - else: - pickle.dump((pca, clf_list), open(pca_km_states, 'w')) - pickle.dump((bow_dat_train, bow_dat_train_pca), open(bow_train_out, 'w')) - pickle.dump((bow_dat_test, bow_dat_test_pca), open(bow_test_out, 'w')) - - print "Done." - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="DM BoW training") - parser.add_argument("img_folder", type=str) - parser.add_argument("dl_state", type=str) - parser.add_argument("--img-extension", "-ext", dest="img_extension", type=str, default="dcm") - parser.add_argument("--img-height", "-ih", dest="img_height", type=int, default=1024) - parser.add_argument("--img-scale", "-ic", dest="img_scale", type=int, default=4095) - parser.add_argument("--val-size", "-vs", dest="val_size", type=float, default=.2) - parser.add_argument("--neg-vs-pos-ratio", dest="neg_vs_pos_ratio", type=float, default=10.) - parser.add_argument("--featurewise-norm", dest="do_featurewise_norm", action="store_true") - parser.add_argument("--no-featurewise-norm", dest="do_featurewise_norm", action="store_false") - parser.set_defaults(do_featurewise_norm=True) - parser.add_argument("--featurewise-mean", dest="featurewise_mean", type=float, default=873.6) - parser.add_argument("--featurewise-std", dest="featurewise_std", type=float, default=739.3) - parser.add_argument("--img-per-batch", "-ipb", dest="img_per_batch", type=int, default=2) - parser.add_argument("--roi-per-img", "-rpi", dest="roi_per_img", type=int, default=32) - parser.add_argument("--roi-size", dest="roi_size", nargs=2, type=int, default=[256, 256]) - parser.add_argument("--low-int-threshold", dest="low_int_threshold", type=float, default=.05) - parser.add_argument("--blob-min-area", dest="blob_min_area", type=int, default=3) - parser.add_argument("--blob-min-int", dest="blob_min_int", type=float, default=.5) - parser.add_argument("--blob-max-int", dest="blob_max_int", type=float, default=.85) - parser.add_argument("--blob-th-step", dest="blob_th_step", type=int, default=10) - parser.add_argument("--roi-state", dest="roi_state", type=str, default=None) - parser.add_argument("--no-roi-state", dest="roi_state", action="store_const", const=None) - parser.add_argument("--roi-clf-bs", dest="roi_clf_bs", type=int, default=32) - parser.add_argument("--pc-components", dest="pc_components", type=float, default=.95) - parser.add_argument("--no-pc-components", dest="pc_components", - action="store_const", const=None) - parser.add_argument("--pc-whiten", dest="pc_whiten", action="store_true") - parser.add_argument("--no-pc-whiten", dest="pc_whiten", action="store_false") - parser.set_defaults(pc_whiten=True) - parser.add_argument("--layer-name", dest="layer_name", nargs=2, type=str, - default=["flatten_1", "dense_1"]) - parser.add_argument("--layer-index", dest="layer_index", nargs=2, type=int, default=None) - parser.add_argument("--nb-words", dest="nb_words", nargs="+", type=int, default=[512]) - parser.add_argument("--km-max-iter", dest="km_max_iter", type=int, default=100) - parser.add_argument("--km-bs", dest="km_bs", type=int, default=1000) - parser.add_argument("--km-patience", dest="km_patience", type=int, default=20) - parser.add_argument("--km-init", dest="km_init", type=int, default=10) - parser.add_argument("--exam-tsv", "-et", dest="exam_tsv", type=str, - default="./metadata/exams_metadata.tsv") - parser.add_argument("--no-exam-tsv", dest="exam_tsv", action="store_const", const=None) - parser.add_argument("--img-tsv", "-it", dest="img_tsv", type=str, - default="./metadata/images_crosswalk.tsv") - parser.add_argument("--pca-km-states", dest="pca_km_states", type=str, - default="./modelState/dlrepr_pca_km_models.pkl") - parser.add_argument("--bow-train-out", dest="bow_train_out", type=str, - default="./modelState/bow_dat_train.pkl") - parser.add_argument("--bow-test-out", dest="bow_test_out", type=str, - default="./modelState/bow_dat_test.pkl") - - args = parser.parse_args() - run_opts = dict( - img_extension=args.img_extension, - img_height=args.img_height, - img_scale=args.img_scale, - val_size=args.val_size if args.val_size < 1 else int(args.val_size), - neg_vs_pos_ratio=args.neg_vs_pos_ratio, - do_featurewise_norm=args.do_featurewise_norm, - featurewise_mean=args.featurewise_mean, - featurewise_std=args.featurewise_std, - img_per_batch=args.img_per_batch, - roi_per_img=args.roi_per_img, - roi_size=tuple(args.roi_size), - low_int_threshold=args.low_int_threshold, - blob_min_area=args.blob_min_area, - blob_min_int=args.blob_min_int, - blob_max_int=args.blob_max_int, - blob_th_step=args.blob_th_step, - roi_state=args.roi_state, - roi_clf_bs=args.roi_clf_bs, - pc_components=(args.pc_components if args.pc_components < 1. - else int(args.pc_components)), - pc_whiten=args.pc_whiten, - layer_name=args.layer_name, - layer_index=args.layer_index, - nb_words=args.nb_words, - km_max_iter=args.km_max_iter, - km_bs=args.km_bs, - km_patience=args.km_patience, - km_init=args.km_init, - exam_tsv=args.exam_tsv, - img_tsv=args.img_tsv, - pca_km_states=args.pca_km_states, - bow_train_out=args.bow_train_out, - bow_test_out=args.bow_test_out - ) - print "\n>>> Model training options: <<<\n", run_opts, "\n" - run(args.img_folder, args.dl_state, **run_opts) - - - - - - - - - - - - - - - diff --git a/training/dm_candidROI_score.py b/training/dm_candidROI_score.py deleted file mode 100644 index 5719dad..0000000 --- a/training/dm_candidROI_score.py +++ /dev/null @@ -1,264 +0,0 @@ -import argparse -import os, sys -import pickle -import numpy as np -from numpy.random import RandomState -from sklearn.model_selection import train_test_split -from keras.models import load_model -from meta import DMMetaManager -from dm_image import DMImageDataGenerator -from dm_keras_ext import DMMetrics as dmm -from dm_multi_gpu import make_parallel - -import warnings -import exceptions -warnings.filterwarnings('ignore', category=exceptions.UserWarning) - - -def get_exam_pred(exam_list, roi_per_img, imgen, **kw_args): - '''Get the predictions for an exam list - ''' - ##################################################### - def get_breast_prob( - case_all_imgs, target_height, target_scale, - img_per_batch, roi_size, - low_int_threshold, blob_min_area, blob_min_int, blob_max_int, - blob_th_step, seed, dl_model): - '''Get prob for all ROIs for all images of a case - ''' - roi_generator = imgen.flow_from_candid_roi( - case_all_imgs, - target_height=target_height, target_scale=target_scale, - class_mode=None, validation_mode=True, - img_per_batch=img_per_batch, roi_per_img=roi_per_img, - roi_size=roi_size, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, - roi_clf=None, return_sample_weight=False, seed=seed) - # import pdb; pdb.set_trace() - pred = dl_model.predict_generator( - roi_generator, val_samples=roi_per_img*len(case_all_imgs)) - # New shape: img x roi x output. - pred = pred.reshape((len(case_all_imgs), roi_per_img, -1)) - return pred - ##################################################### - - meta_prob_list = [] - for subj, exidx, exam in exam_list: - try: - cancerL = int(exam['L']['cancer']) - except ValueError: - cancerL = 0 - try: - cancerR = int(exam['R']['cancer']) - except ValueError: - cancerR = 0 - - try: - probL = get_breast_prob(exam['L']['img'], **kw_args) - except KeyError: # unimaged breast. - probL = np.array([[[1.,0.,0.]]*roi_per_img]) - meta_prob_list.append((subj, exidx, 'L', cancerL, probL)) - - try: - probR = get_breast_prob(exam['R']['img'], **kw_args) - except KeyError: # unimaged breast. - probR = np.array([[[1.,0.,0.]]*roi_per_img]) - meta_prob_list.append((subj, exidx, 'R', cancerR, probR)) - - return meta_prob_list - - -def run(img_folder, dl_state, img_extension='dcm', - img_height=1024, img_scale=4095, val_size=.2, neg_vs_pos_ratio=10., - do_featurewise_norm=True, featurewise_mean=873.6, featurewise_std=739.3, - img_per_batch=2, roi_per_img=32, roi_size=(256, 256), - low_int_threshold=.05, blob_min_area=3, - blob_min_int=.5, blob_max_int=.85, blob_th_step=10, - exam_tsv='./metadata/exams_metadata.tsv', - img_tsv='./metadata/images_crosswalk.tsv', - train_out='./modelState/meta_prob_train.pkl', - test_out='./modelState/meta_prob_test.pkl'): - '''Calculate bag of deep visual words count matrix for all breasts - ''' - - # Read some env variables. - random_seed = int(os.getenv('RANDOM_SEED', 12345)) - rng = RandomState(random_seed) # an rng used across board. - gpu_count = int(os.getenv('NUM_GPU_DEVICES', 1)) - - # Load and split image and label lists. - meta_man = DMMetaManager(exam_tsv=exam_tsv, - img_tsv=img_tsv, - img_folder=img_folder, - img_extension=img_extension) - subj_list, subj_labs = meta_man.get_subj_labs() - subj_train, subj_test, labs_train, labs_test = train_test_split( - subj_list, subj_labs, test_size=val_size, stratify=subj_labs, - random_state=random_seed) - if neg_vs_pos_ratio is not None: - def subset_subj(subj, labs): - subj = np.array(subj) - labs = np.array(labs) - pos_idx = np.where(labs==1)[0] - neg_idx = np.where(labs==0)[0] - nb_neg_desired = int(len(pos_idx)*neg_vs_pos_ratio) - if nb_neg_desired >= len(neg_idx): - return subj.tolist() - else: - neg_chosen = rng.choice(neg_idx, nb_neg_desired, replace=False) - subset_idx = np.concatenate([pos_idx, neg_chosen]) - return subj[subset_idx].tolist() - - subj_train = subset_subj(subj_train, labs_train) - subj_test = subset_subj(subj_test, labs_test) - - # Create image generator for ROIs for representation extraction. - print "Create an image generator for ROIs"; sys.stdout.flush() - if do_featurewise_norm: - imgen = DMImageDataGenerator( - featurewise_center=True, - featurewise_std_normalization=True) - imgen.mean = featurewise_mean - imgen.std = featurewise_std - else: - imgen = DMImageDataGenerator( - samplewise_center=True, - samplewise_std_normalization=True) - - # Load DL model. - print "Load DL classification model:", dl_state; sys.stdout.flush() - dl_model = load_model( - dl_state, - custom_objects={ - 'sensitivity': dmm.sensitivity, - 'specificity': dmm.specificity - } - ) - if gpu_count > 1: - print "Make the model parallel on %d GPUs" % (gpu_count) - sys.stdout.flush() - dl_model = make_parallel(dl_model, gpu_count) - - # Read exam lists. - exam_train = meta_man.get_flatten_exam_list( - subj_train, flatten_img_list=True) - exam_test = meta_man.get_flatten_exam_list( - subj_test, flatten_img_list=True) - exam_labs_train = np.array(meta_man.exam_labs(exam_train)) - exam_labs_test = np.array(meta_man.exam_labs(exam_test)) - nb_pos_exams_train = (exam_labs_train==1).sum() - nb_neg_exams_train = (exam_labs_train==0).sum() - nb_pos_exams_test = (exam_labs_test==1).sum() - nb_neg_exams_test = (exam_labs_test==0).sum() - print "Train set - Nb of pos exams: %d, Nb of neg exams: %d" % \ - (nb_pos_exams_train, nb_neg_exams_train) - print "Test set - Nb of pos exams: %d, Nb of neg exams: %d" % \ - (nb_pos_exams_test, nb_neg_exams_test) - - # Make predictions for exam lists. - print "Predicting for train exam list"; sys.stdout.flush() - meta_prob_train = get_exam_pred( - exam_train, roi_per_img, imgen, - target_height=img_height, target_scale=img_scale, - img_per_batch=img_per_batch, roi_size=roi_size, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, seed=random_seed, - dl_model=dl_model) - print "Length of train prediction list:", len(meta_prob_train) - sys.stdout.flush() - - print "Predicting for test exam list"; sys.stdout.flush() - meta_prob_test = get_exam_pred( - exam_test, roi_per_img, imgen, - target_height=img_height, target_scale=img_scale, - img_per_batch=img_per_batch, roi_size=roi_size, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, seed=random_seed, - dl_model=dl_model) - print "Length of test prediction list:", len(meta_prob_test) - sys.stdout.flush() - - pickle.dump(meta_prob_train, open(train_out, 'w')) - pickle.dump(meta_prob_test, open(test_out, 'w')) - print "Done." - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="DM candidROI prediction") - parser.add_argument("img_folder", type=str) - parser.add_argument("dl_state", type=str) - parser.add_argument("--img-extension", "-ext", dest="img_extension", type=str, default="dcm") - parser.add_argument("--img-height", "-ih", dest="img_height", type=int, default=1024) - parser.add_argument("--img-scale", "-ic", dest="img_scale", type=int, default=4095) - parser.add_argument("--val-size", "-vs", dest="val_size", type=float, default=.2) - parser.add_argument("--neg-vs-pos-ratio", dest="neg_vs_pos_ratio", type=float, default=10.) - parser.add_argument("--no-neg-vs-pos-ratio", dest="neg_vs_pos_ratio", - action="store_const", const=None) - parser.add_argument("--featurewise-norm", dest="do_featurewise_norm", action="store_true") - parser.add_argument("--no-featurewise-norm", dest="do_featurewise_norm", action="store_false") - parser.set_defaults(do_featurewise_norm=True) - parser.add_argument("--featurewise-mean", dest="featurewise_mean", type=float, default=873.6) - parser.add_argument("--featurewise-std", dest="featurewise_std", type=float, default=739.3) - parser.add_argument("--img-per-batch", "-ipb", dest="img_per_batch", type=int, default=2) - parser.add_argument("--roi-per-img", "-rpi", dest="roi_per_img", type=int, default=32) - parser.add_argument("--roi-size", dest="roi_size", nargs=2, type=int, default=[256, 256]) - parser.add_argument("--low-int-threshold", dest="low_int_threshold", type=float, default=.05) - parser.add_argument("--blob-min-area", dest="blob_min_area", type=int, default=3) - parser.add_argument("--blob-min-int", dest="blob_min_int", type=float, default=.5) - parser.add_argument("--blob-max-int", dest="blob_max_int", type=float, default=.85) - parser.add_argument("--blob-th-step", dest="blob_th_step", type=int, default=10) - parser.add_argument("--exam-tsv", "-et", dest="exam_tsv", type=str, - default="./metadata/exams_metadata.tsv") - parser.add_argument("--no-exam-tsv", dest="exam_tsv", action="store_const", const=None) - parser.add_argument("--img-tsv", "-it", dest="img_tsv", type=str, - default="./metadata/images_crosswalk.tsv") - parser.add_argument("--train-out", dest="train_out", type=str, - default="./modelState/meta_prob_train.pkl") - parser.add_argument("--test-out", dest="test_out", type=str, - default="./modelState/meta_prob_test.pkl") - - args = parser.parse_args() - run_opts = dict( - img_extension=args.img_extension, - img_height=args.img_height, - img_scale=args.img_scale, - val_size=args.val_size if args.val_size < 1 else int(args.val_size), - neg_vs_pos_ratio=args.neg_vs_pos_ratio, - do_featurewise_norm=args.do_featurewise_norm, - featurewise_mean=args.featurewise_mean, - featurewise_std=args.featurewise_std, - img_per_batch=args.img_per_batch, - roi_per_img=args.roi_per_img, - roi_size=tuple(args.roi_size), - low_int_threshold=args.low_int_threshold, - blob_min_area=args.blob_min_area, - blob_min_int=args.blob_min_int, - blob_max_int=args.blob_max_int, - blob_th_step=args.blob_th_step, - exam_tsv=args.exam_tsv, - img_tsv=args.img_tsv, - train_out=args.train_out, - test_out=args.test_out - ) - print "\n>>> Model training options: <<<\n", run_opts, "\n" - run(args.img_folder, args.dl_state, **run_opts) - - - - - - - - - - - - - - - diff --git a/training/dm_candidROI_train.py b/training/dm_candidROI_train.py deleted file mode 100644 index 3c0fed3..0000000 --- a/training/dm_candidROI_train.py +++ /dev/null @@ -1,533 +0,0 @@ -import os, argparse, sys, pickle -import numpy as np -from sklearn.model_selection import train_test_split -from keras.callbacks import ( - ReduceLROnPlateau, - EarlyStopping, -) -from keras.optimizers import SGD -from keras.models import load_model -import tensorflow as tf -from meta import DMMetaManager -from dm_image import DMImageDataGenerator, to_sparse -from dm_resnet import ResNetBuilder -from dm_multi_gpu import make_parallel -from dm_keras_ext import DMMetrics, DMAucModelCheckpoint, load_dat_ram -from dm_candidROI_score import get_exam_pred -import warnings -import exceptions -warnings.filterwarnings('ignore', category=exceptions.UserWarning) - - -def run(img_folder, img_extension='dcm', - img_height=1024, img_scale=4095, - do_featurewise_norm=True, norm_fit_size=10, - img_per_batch=2, roi_per_img=32, roi_size=(256, 256), - one_patch_mode=False, - low_int_threshold=.05, blob_min_area=3, - blob_min_int=.5, blob_max_int=.85, blob_th_step=10, - data_augmentation=False, roi_state=None, clf_bs=32, cutpoint=.5, - amp_factor=1., return_sample_weight=True, auto_batch_balance=True, - patches_per_epoch=12800, nb_epoch=20, - neg_vs_pos_ratio=None, all_neg_skip=0., - nb_init_filter=32, init_filter_size=5, init_conv_stride=2, - pool_size=2, pool_stride=2, - weight_decay=.0001, alpha=.0001, l1_ratio=.0, - inp_dropout=.0, hidden_dropout=.0, init_lr=.01, - test_size=.2, val_size=.0, - lr_patience=3, es_patience=10, - resume_from=None, net='resnet50', load_val_ram=False, - load_train_ram=False, no_pos_skip=0., balance_classes=0., - pred_img_per_batch=1, pred_roi_per_img=32, - exam_tsv='./metadata/exams_metadata.tsv', - img_tsv='./metadata/images_crosswalk.tsv', - best_model='./modelState/dm_candidROI_best_model.h5', - final_model="NOSAVE", - pred_trainval=False, pred_out="dl_pred_out.pkl"): - '''Run ResNet training on candidate ROIs from mammograms - Args: - norm_fit_size ([int]): the number of patients used to calculate - feature-wise mean and std. - ''' - - # Read some env variables. - random_seed = int(os.getenv('RANDOM_SEED', 12345)) - # Use of multiple CPU cores is not working! - # When nb_worker>1 and pickle_safe=True, this error is encountered: - # "failed to enqueue async memcpy from host to device: CUDA_ERROR_NOT_INITIALIZED" - # To avoid the error, only this combination worked: - # nb_worker=1 and pickle_safe=False. - nb_worker = int(os.getenv('NUM_CPU_CORES', 4)) - gpu_count = int(os.getenv('NUM_GPU_DEVICES', 1)) - - # Setup training and validation data. - # Load image or exam lists and split them into train and val sets. - meta_man = DMMetaManager(exam_tsv=exam_tsv, - img_tsv=img_tsv, - img_folder=img_folder, - img_extension=img_extension) - # Split data based on subjects. - subj_list, subj_labs = meta_man.get_subj_labs() - subj_train, subj_test, slab_train, slab_test = train_test_split( - subj_list, subj_labs, test_size=test_size, random_state=random_seed, - stratify=subj_labs) - if val_size > 0: # train/val split. - subj_train, subj_val, slab_train, slab_val = train_test_split( - subj_train, slab_train, test_size=val_size, - random_state=random_seed, stratify=slab_train) - else: # use test as val. make a copy of the test list. - subj_val = list(subj_test) - slab_val = list(slab_test) - # import pdb; pdb.set_trace() - # Subset subject lists to desired ratio. - if neg_vs_pos_ratio is not None: - subj_train, slab_train = DMMetaManager.subset_subj_list( - subj_train, slab_train, neg_vs_pos_ratio, random_seed) - subj_val, slab_val = DMMetaManager.subset_subj_list( - subj_val, slab_val, neg_vs_pos_ratio, random_seed) - print "After sampling, Nb of subjects for train=%d, val=%d, test=%d" \ - % (len(subj_train), len(subj_val), len(subj_test)) - # Get image and label lists. - img_train, lab_train = meta_man.get_flatten_img_list(subj_train) - img_val, lab_val = meta_man.get_flatten_img_list(subj_val) - - # Create image generators for train, fit and val. - imgen_trainval = DMImageDataGenerator() - if data_augmentation: - imgen_trainval.horizontal_flip=True - imgen_trainval.vertical_flip=True - imgen_trainval.rotation_range = 45. - imgen_trainval.shear_range = np.pi/8. - # imgen_trainval.width_shift_range = .05 - # imgen_trainval.height_shift_range = .05 - # imgen_trainval.zoom_range = [.95, 1.05] - - if do_featurewise_norm: - imgen_trainval.featurewise_center = True - imgen_trainval.featurewise_std_normalization = True - # Fit feature-wise mean and std. - img_fit,_ = meta_man.get_flatten_img_list( - subj_train[:norm_fit_size]) # fit on a subset. - print ">>> Fit image generator <<<"; sys.stdout.flush() - fit_generator = imgen_trainval.flow_from_candid_roi( - img_fit, - target_height=img_height, target_scale=img_scale, - class_mode=None, validation_mode=True, - img_per_batch=len(img_fit), roi_per_img=roi_per_img, - roi_size=roi_size, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, - roi_clf=None, return_sample_weight=False, seed=random_seed) - imgen_trainval.fit(fit_generator.next()) - print "Estimates from %d images: mean=%.1f, std=%.1f." % \ - (len(img_fit), imgen_trainval.mean, imgen_trainval.std) - sys.stdout.flush() - else: - imgen_trainval.samplewise_center = True - imgen_trainval.samplewise_std_normalization = True - - # Load ROI classifier. - if roi_state is not None: - roi_clf = load_model( - roi_state, - custom_objects={ - 'sensitivity': DMMetrics.sensitivity, - 'specificity': DMMetrics.specificity - } - ) - graph = tf.get_default_graph() - else: - roi_clf = None - graph = None - - # Set some DL training related parameters. - if one_patch_mode: - class_mode = 'binary' - loss = 'binary_crossentropy' - metrics = [DMMetrics.sensitivity, DMMetrics.specificity] - else: - class_mode = 'categorical' - loss = 'categorical_crossentropy' - metrics = ['accuracy', 'precision', 'recall'] - if load_train_ram: - validation_mode = True - return_raw_img = True - else: - validation_mode = False - return_raw_img = False - - # Create train and val generators. - print ">>> Train image generator <<<"; sys.stdout.flush() - train_generator = imgen_trainval.flow_from_candid_roi( - img_train, lab_train, - target_height=img_height, target_scale=img_scale, - class_mode=class_mode, validation_mode=validation_mode, - img_per_batch=img_per_batch, roi_per_img=roi_per_img, - roi_size=roi_size, one_patch_mode=one_patch_mode, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, - tf_graph=graph, roi_clf=roi_clf, clf_bs=clf_bs, cutpoint=cutpoint, - amp_factor=amp_factor, return_sample_weight=return_sample_weight, - auto_batch_balance=auto_batch_balance, - all_neg_skip=all_neg_skip, shuffle=True, seed=random_seed, - return_raw_img=return_raw_img) - - print ">>> Validation image generator <<<"; sys.stdout.flush() - val_generator = imgen_trainval.flow_from_candid_roi( - img_val, lab_val, - target_height=img_height, target_scale=img_scale, - class_mode=class_mode, validation_mode=True, - img_per_batch=img_per_batch, roi_per_img=roi_per_img, - roi_size=roi_size, one_patch_mode=one_patch_mode, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, - tf_graph=graph, roi_clf=roi_clf, clf_bs=clf_bs, cutpoint=cutpoint, - amp_factor=amp_factor, return_sample_weight=False, - auto_batch_balance=False, - seed=random_seed) - - # Load train and validation set into RAM. - if one_patch_mode: - nb_train_samples = len(img_train) - nb_val_samples = len(img_val) - else: - nb_train_samples = len(img_train)*roi_per_img - nb_val_samples = len(img_val)*roi_per_img - if load_val_ram: - print "Loading validation data into RAM.", - sys.stdout.flush() - validation_set = load_dat_ram(val_generator, nb_val_samples) - print "Done."; sys.stdout.flush() - sparse_y = to_sparse(validation_set[1]) - for uy in np.unique(sparse_y): - print "Nb of samples for class:%d = %d" % \ - (uy, (sparse_y==uy).sum()) - sys.stdout.flush() - if load_train_ram: - print "Loading train data into RAM.", - sys.stdout.flush() - train_set = load_dat_ram(train_generator, nb_train_samples) - print "Done."; sys.stdout.flush() - sparse_y = to_sparse(train_set[1]) - for uy in np.unique(sparse_y): - print "Nb of samples for class:%d = %d" % \ - (uy, (sparse_y==uy).sum()) - sys.stdout.flush() - train_generator = imgen_trainval.flow( - train_set[0], train_set[1], batch_size=clf_bs, - auto_batch_balance=auto_batch_balance, no_pos_skip=no_pos_skip, - balance_classes=balance_classes, shuffle=True, seed=random_seed) - - # Load or create model. - if resume_from is not None: - model = load_model( - resume_from, - custom_objects={ - 'sensitivity': DMMetrics.sensitivity, - 'specificity': DMMetrics.specificity - } - ) - else: - builder = ResNetBuilder - if net == 'resnet18': - model = builder.build_resnet_18( - (1, roi_size[0], roi_size[1]), 3, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - elif net == 'resnet34': - model = builder.build_resnet_34( - (1, roi_size[0], roi_size[1]), 3, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - elif net == 'resnet50': - model = builder.build_resnet_50( - (1, roi_size[0], roi_size[1]), 3, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - elif net == 'resnet101': - model = builder.build_resnet_101( - (1, roi_size[0], roi_size[1]), 3, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - elif net == 'resnet152': - model = builder.build_resnet_152( - (1, roi_size[0], roi_size[1]), 3, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - - if gpu_count > 1: - model = make_parallel(model, gpu_count) - - # Model training. - sgd = SGD(lr=init_lr, momentum=0.9, decay=0.0, nesterov=True) - model.compile(optimizer=sgd, loss=loss, metrics=metrics) - reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, - patience=lr_patience, verbose=1) - early_stopping = EarlyStopping(monitor='val_loss', patience=es_patience, - verbose=1) - if load_val_ram: - auc_checkpointer = DMAucModelCheckpoint( - best_model, validation_set, batch_size=clf_bs) - else: - auc_checkpointer = DMAucModelCheckpoint( - best_model, val_generator, nb_test_samples=nb_val_samples) - hist = model.fit_generator( - train_generator, - samples_per_epoch=patches_per_epoch, - nb_epoch=nb_epoch, - validation_data=validation_set if load_val_ram else val_generator, - nb_val_samples=nb_val_samples, - callbacks=[reduce_lr, early_stopping, auc_checkpointer], - # nb_worker=1, pickle_safe=False, - nb_worker=nb_worker if load_train_ram else 1, - pickle_safe=True if load_train_ram else False, - verbose=2) - - if final_model != "NOSAVE": - print "Saving final model to:", final_model; sys.stdout.flush() - model.save(final_model) - - # Training report. - min_loss_locs, = np.where(hist.history['val_loss'] == min(hist.history['val_loss'])) - best_val_loss = hist.history['val_loss'][min_loss_locs[0]] - if one_patch_mode: - best_val_sensitivity = hist.history['val_sensitivity'][min_loss_locs[0]] - best_val_specificity = hist.history['val_specificity'][min_loss_locs[0]] - else: - best_val_precision = hist.history['val_precision'][min_loss_locs[0]] - best_val_recall = hist.history['val_recall'][min_loss_locs[0]] - best_val_accuracy = hist.history['val_acc'][min_loss_locs[0]] - print "\n==== Training summary ====" - print "Minimum val loss achieved at epoch:", min_loss_locs[0] + 1 - print "Best val loss:", best_val_loss - if one_patch_mode: - print "Best val sensitivity:", best_val_sensitivity - print "Best val specificity:", best_val_specificity - else: - print "Best val precision:", best_val_precision - print "Best val recall:", best_val_recall - print "Best val accuracy:", best_val_accuracy - - # Make predictions on train, val, test exam lists. - if best_model != 'NOSAVE': - print "\n==== Making predictions ====" - print "Load best model for prediction:", best_model - sys.stdout.flush() - pred_model = load_model(best_model) - if gpu_count > 1: - pred_model = make_parallel(pred_model, gpu_count) - - if pred_trainval: - print "Load exam lists for train, val sets"; sys.stdout.flush() - exam_train = meta_man.get_flatten_exam_list( - subj_train, flatten_img_list=True) - print "Train exam list length=", len(exam_train); sys.stdout.flush() - exam_val = meta_man.get_flatten_exam_list( - subj_val, flatten_img_list=True) - print "Val exam list length=", len(exam_val); sys.stdout.flush() - print "Load exam list for test set"; sys.stdout.flush() - exam_test = meta_man.get_flatten_exam_list( - subj_test, flatten_img_list=True) - print "Test exam list length=", len(exam_test); sys.stdout.flush() - - if do_featurewise_norm: - imgen_pred = DMImageDataGenerator() - imgen_pred.featurewise_center = True - imgen_pred.featurewise_std_normalization = True - imgen_pred.mean = imgen_trainval.mean - imgen_pred.std = imgen_trainval.std - else: - imgen_pred.samplewise_center = True - imgen_pred.samplewise_std_normalization = True - - if pred_trainval: - print "Make predictions on train exam list"; sys.stdout.flush() - meta_prob_train = get_exam_pred( - exam_train, pred_roi_per_img, imgen_pred, - target_height=img_height, target_scale=img_scale, - img_per_batch=pred_img_per_batch, roi_size=roi_size, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, seed=random_seed, - dl_model=pred_model) - print "Train prediction list length=", len(meta_prob_train) - - print "Make predictions on val exam list"; sys.stdout.flush() - meta_prob_val = get_exam_pred( - exam_val, pred_roi_per_img, imgen_pred, - target_height=img_height, target_scale=img_scale, - img_per_batch=pred_img_per_batch, roi_size=roi_size, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, seed=random_seed, - dl_model=pred_model) - print "Val prediction list length=", len(meta_prob_val) - - print "Make predictions on test exam list"; sys.stdout.flush() - meta_prob_test = get_exam_pred( - exam_test, pred_roi_per_img, imgen_pred, - target_height=img_height, target_scale=img_scale, - img_per_batch=pred_img_per_batch, roi_size=roi_size, - low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, - blob_min_int=blob_min_int, blob_max_int=blob_max_int, - blob_th_step=blob_th_step, seed=random_seed, - dl_model=pred_model) - print "Test prediction list length=", len(meta_prob_test) - - if pred_trainval: - pickle.dump((meta_prob_train, meta_prob_val, meta_prob_test), - open(pred_out, 'w')) - else: - pickle.dump(meta_prob_test, open(pred_out, 'w')) - - return hist - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="DM Candid ROI training") - parser.add_argument("img_folder", type=str) - parser.add_argument("--img-extension", "-ext", dest="img_extension", type=str, default="dcm") - parser.add_argument("--img-height", "-ih", dest="img_height", type=int, default=1024) - parser.add_argument("--img-scale", "-ic", dest="img_scale", type=int, default=4095) - parser.add_argument("--featurewise-norm", dest="do_featurewise_norm", action="store_true") - parser.add_argument("--no-featurewise-norm", dest="do_featurewise_norm", action="store_false") - parser.set_defaults(do_featurewise_norm=True) - parser.add_argument("--norm-fit-size", "-nfs", dest="norm_fit_size", type=int, default=10) - parser.add_argument("--img-per-batch", "-ipb", dest="img_per_batch", type=int, default=2) - parser.add_argument("--pred-img-per-batch", dest="pred_img_per_batch", type=int, default=1) - parser.add_argument("--roi-per-img", "-rpi", dest="roi_per_img", type=int, default=32) - parser.add_argument("--pred-roi-per-img", dest="pred_roi_per_img", type=int, default=32) - parser.add_argument("--roi-size", dest="roi_size", nargs=2, type=int, default=[256, 256]) - parser.add_argument("--one-patch-mode", dest="one_patch_mode", action="store_true") - parser.add_argument("--no-one-patch-mode", dest="one_patch_mode", action="store_false") - parser.set_defaults(one_patch_mode=False) - parser.add_argument("--low-int-threshold", dest="low_int_threshold", type=float, default=.05) - parser.add_argument("--blob-min-area", dest="blob_min_area", type=int, default=3) - parser.add_argument("--blob-min-int", dest="blob_min_int", type=float, default=.5) - parser.add_argument("--blob-max-int", dest="blob_max_int", type=float, default=.85) - parser.add_argument("--blob-th-step", dest="blob_th_step", type=int, default=10) - parser.add_argument("--data-augmentation", dest="data_augmentation", action="store_true") - parser.add_argument("--no-data-augmentation", dest="data_augmentation", action="store_false") - parser.set_defaults(data_augmentation=False) - parser.add_argument("--roi-state", dest="roi_state", type=str, default=None) - parser.add_argument("--no-roi-state", dest="roi_state", action="store_const", const=None) - parser.add_argument("--clf-bs", dest="clf_bs", type=int, default=32) - parser.add_argument("--cutpoint", dest="cutpoint", type=float, default=.5) - parser.add_argument("--amp-factor", dest="amp_factor", type=float, default=1.) - parser.add_argument("--return-sample-weight", dest="return_sample_weight", action="store_true") - parser.add_argument("--no-return-sample-weight", dest="return_sample_weight", action="store_false") - parser.set_defaults(return_sample_weight=True) - parser.add_argument("--auto-batch-balance", dest="auto_batch_balance", action="store_true") - parser.add_argument("--no-auto-batch-balance", dest="auto_batch_balance", action="store_false") - parser.set_defaults(auto_batch_balance=True) - parser.add_argument("--patches-per-epoch", "-ppe", dest="patches_per_epoch", type=int, default=12800) - parser.add_argument("--nb-epoch", "-ne", dest="nb_epoch", type=int, default=20) - parser.add_argument("--nvp-ratio", dest="neg_vs_pos_ratio", type=float, default=None) - parser.add_argument("--no-nvp-ratio", dest="neg_vs_pos_ratio", action="store_const", const=None) - parser.add_argument("--allneg-skip", dest="all_neg_skip", type=float, default=0.) - parser.add_argument("--nopos-skip", dest="no_pos_skip", type=float, default=0.) - parser.add_argument("--balance-classes", dest="balance_classes", type=float, default=0.) - parser.add_argument("--nb-init-filter", "-nif", dest="nb_init_filter", type=int, default=32) - parser.add_argument("--init-filter-size", "-ifs", dest="init_filter_size", type=int, default=5) - parser.add_argument("--init-conv-stride", "-ics", dest="init_conv_stride", type=int, default=2) - parser.add_argument("--max-pooling-size", "-mps", dest="pool_size", type=int, default=2) - parser.add_argument("--max-pooling-stride", "-mpr", dest="pool_stride", type=int, default=2) - parser.add_argument("--weight-decay", "-wd", dest="weight_decay", type=float, default=.0001) - parser.add_argument("--alpha", dest="alpha", type=float, default=.0001) - parser.add_argument("--l1-ratio", dest="l1_ratio", type=float, default=.0) - parser.add_argument("--inp-dropout", "-id", dest="inp_dropout", type=float, default=.0) - parser.add_argument("--hidden-dropout", "-hd", dest="hidden_dropout", type=float, default=.0) - parser.add_argument("--init-learningrate", "-ilr", dest="init_lr", type=float, default=.01) - parser.add_argument("--test-size", "-ts", dest="test_size", type=float, default=.2) - parser.add_argument("--val-size", "-vs", dest="val_size", type=float, default=.0) - parser.add_argument("--lr-patience", "-lrp", dest="lr_patience", type=int, default=3) - parser.add_argument("--es-patience", "-esp", dest="es_patience", type=int, default=10) - parser.add_argument("--resume-from", "-rf", dest="resume_from", type=str, default=None) - parser.add_argument("--no-resume-from", "-nrf", dest="resume_from", action="store_const", const=None) - parser.add_argument("--net", dest="net", type=str, default="resnet50") - parser.add_argument("--loadval-ram", dest="load_val_ram", action="store_true") - parser.add_argument("--no-loadval-ram", dest="load_val_ram", action="store_false") - parser.set_defaults(load_val_ram=False) - parser.add_argument("--loadtrain-ram", dest="load_train_ram", action="store_true") - parser.add_argument("--no-loadtrain-ram", dest="load_train_ram", action="store_false") - parser.set_defaults(load_train_ram=False) - parser.add_argument("--exam-tsv", "-et", dest="exam_tsv", type=str, - default="./metadata/exams_metadata.tsv") - parser.add_argument("--no-exam-tsv", dest="exam_tsv", action="store_const", const=None) - parser.add_argument("--img-tsv", "-it", dest="img_tsv", type=str, - default="./metadata/images_crosswalk.tsv") - parser.add_argument("--best-model", "-bm", dest="best_model", type=str, - default="./modelState/dm_candidROI_best_model.h5") - parser.add_argument("--final-model", "-fm", dest="final_model", type=str, - default="NOSAVE") - parser.add_argument("--pred-trainval", dest="pred_trainval", action="store_true") - parser.add_argument("--no-pred-trainval", dest="pred_trainval", action="store_false") - parser.set_defaults(pred_trainval=False) - parser.add_argument("--pred-out", dest="pred_out", type=str, default="dl_pred_out.pkl") - - args = parser.parse_args() - run_opts = dict( - img_extension=args.img_extension, - img_height=args.img_height, - img_scale=args.img_scale, - do_featurewise_norm=args.do_featurewise_norm, - norm_fit_size=args.norm_fit_size, - img_per_batch=args.img_per_batch, - pred_img_per_batch=args.pred_img_per_batch, - roi_per_img=args.roi_per_img, - pred_roi_per_img=args.pred_roi_per_img, - roi_size=tuple(args.roi_size), - one_patch_mode=args.one_patch_mode, - low_int_threshold=args.low_int_threshold, - blob_min_area=args.blob_min_area, - blob_min_int=args.blob_min_int, - blob_max_int=args.blob_max_int, - blob_th_step=args.blob_th_step, - data_augmentation=args.data_augmentation, - roi_state=args.roi_state, - clf_bs=args.clf_bs, - cutpoint=args.cutpoint, - amp_factor=args.amp_factor, - return_sample_weight=args.return_sample_weight, - auto_batch_balance=args.auto_batch_balance, - patches_per_epoch=args.patches_per_epoch, - nb_epoch=args.nb_epoch, - neg_vs_pos_ratio=args.neg_vs_pos_ratio, - all_neg_skip=args.all_neg_skip, - nb_init_filter=args.nb_init_filter, - init_filter_size=args.init_filter_size, - init_conv_stride=args.init_conv_stride, - pool_size=args.pool_size, - pool_stride=args.pool_stride, - weight_decay=args.weight_decay, - alpha=args.alpha, - l1_ratio=args.l1_ratio, - inp_dropout=args.inp_dropout, - hidden_dropout=args.hidden_dropout, - init_lr=args.init_lr, - val_size=args.val_size if args.val_size < 1 else int(args.val_size), - test_size=args.test_size if args.test_size < 1 else int(args.test_size), - lr_patience=args.lr_patience, - es_patience=args.es_patience, - resume_from=args.resume_from, - net=args.net, - load_val_ram=args.load_val_ram, - load_train_ram=args.load_train_ram, - no_pos_skip=args.no_pos_skip, - balance_classes=args.balance_classes, - exam_tsv=args.exam_tsv, - img_tsv=args.img_tsv, - best_model=args.best_model, - final_model=args.final_model, - pred_trainval=args.pred_trainval, - pred_out=args.pred_out - ) - print "\n>>> Model training options: <<<\n", run_opts, "\n" - run(args.img_folder, **run_opts) - - - diff --git a/training/dm_enet_train.py b/training/dm_enet_train.py deleted file mode 100644 index 4777910..0000000 --- a/training/dm_enet_train.py +++ /dev/null @@ -1,304 +0,0 @@ -import os, argparse -import pickle -import time -import numpy as np -from numpy.random import RandomState -from sklearn.linear_model import SGDClassifier -from sklearn.metrics import ( - roc_auc_score, - precision_score, - recall_score, - log_loss -) -from sklearn.model_selection import train_test_split -# from sklearn.exceptions import UndefinedMetricWarning -from keras.models import load_model, Model -from meta import DMMetaManager -from dm_image import DMImageDataGenerator -from dm_multi_gpu import make_parallel -from dm_keras_ext import DMMetrics - -import warnings -import exceptions -warnings.filterwarnings('ignore', category=exceptions.UserWarning) - - -def dlrepr_generator(repr_model, datgen, dat_size): - '''Obtain DL representations (and labels) from a generator - ''' - # Setup test data in RAM. - X_list = [] - y_list = [] - samples_seen = 0 - while samples_seen < dat_size: - ret = next(datgen) - if isinstance(ret, tuple): - X, y = ret - y_list.append(y) - else: - X = ret - X_repr = repr_model.predict_on_batch(X) - X_list.append(X_repr) - samples_seen += len(X_repr) - X_dat = np.concatenate(X_list) - if len(y_list) > 0: - y_dat = np.concatenate(y_list) - return X_dat, y_dat - else: - return X_dat - - -def run(img_folder, img_extension='png', img_size=[288, 224], multi_view=False, - do_featurewise_norm=True, featurewise_mean=7772., featurewise_std=12187., - batch_size=16, samples_per_epoch=160, nb_epoch=20, val_size=.2, - balance_classes=0., all_neg_skip=False, pos_cls_weight=1.0, - alpha=1., l1_ratio=.5, init_lr=.01, lr_patience=2, es_patience=4, - exam_tsv='./metadata/exams_metadata.tsv', - img_tsv='./metadata/images_crosswalk.tsv', - dl_state='./modelState/resnet50_288_best_model.h5', - best_model='./modelState/enet_288_best_model.h5', - final_model="NOSAVE"): - - # Read some env variables. - random_seed = int(os.getenv('RANDOM_SEED', 12345)) - nb_worker = int(os.getenv('NUM_CPU_CORES', 4)) - gpu_count = int(os.getenv('NUM_GPU_DEVICES', 1)) - - # Setup training and validation data. - meta_man = DMMetaManager(exam_tsv=exam_tsv, img_tsv=img_tsv, - img_folder=img_folder, img_extension=img_extension) - - if multi_view: - exam_list = meta_man.get_flatten_exam_list() - exam_train, exam_val = train_test_split( - exam_list, test_size=val_size, random_state=random_seed, - stratify=meta_man.exam_labs(exam_list)) - val_size_ = len(exam_val)*2 # L and R. - else: - img_list, lab_list = meta_man.get_flatten_img_list() - img_train, img_val, lab_train, lab_val = train_test_split( - img_list, lab_list, test_size=val_size, random_state=random_seed, - stratify=lab_list) - val_size_ = len(img_val) - - img_gen = DMImageDataGenerator( - horizontal_flip=True, - vertical_flip=True) - if do_featurewise_norm: - img_gen.featurewise_center = True - img_gen.featurewise_std_normalization = True - img_gen.mean = featurewise_mean - img_gen.std = featurewise_std - else: - img_gen.samplewise_center = True - img_gen.samplewise_std_normalization = True - - if multi_view: - train_generator = img_gen.flow_from_exam_list( - exam_train, target_size=(img_size[0], img_size[1]), - batch_size=batch_size, balance_classes=balance_classes, - all_neg_skip=all_neg_skip, shuffle=True, seed=random_seed, - class_mode='binary') - val_generator = img_gen.flow_from_exam_list( - exam_val, target_size=(img_size[0], img_size[1]), - batch_size=batch_size, validation_mode=True, - class_mode='binary') - else: - train_generator = img_gen.flow_from_img_list( - img_train, lab_train, target_size=(img_size[0], img_size[1]), - batch_size=batch_size, balance_classes=balance_classes, - all_neg_skip=all_neg_skip, shuffle=True, seed=random_seed, - class_mode='binary') - val_generator = img_gen.flow_from_img_list( - img_val, lab_val, target_size=(img_size[0], img_size[1]), - batch_size=batch_size, validation_mode=True, - class_mode='binary') - - - # Deep learning model. - dl_model = load_model( - dl_state, - custom_objects={'sensitivity': DMMetrics.sensitivity, - 'specificity': DMMetrics.specificity}) - # Dummy compilation to turn off the "uncompiled" error when model was run on multi-GPUs. - # dl_model.compile(optimizer='sgd', loss='binary_crossentropy') - reprlayer_model = Model( - input=dl_model.input, output=dl_model.get_layer(index=-2).output) - if gpu_count > 1: - reprlayer_model = make_parallel(reprlayer_model, gpu_count) - - - # Setup test data in RAM. - X_test, y_test = dlrepr_generator( - reprlayer_model, val_generator, val_size_) - # import pdb; pdb.set_trace() - - - # Evaluat DL model on the test data. - val_generator.reset() - dl_test_pred = dl_model.predict_generator( - val_generator, val_samples=val_size_, nb_worker=1, - pickle_safe=False) - # Set nb_worker to >1 can cause: - # either inconsistent result when pickle_safe is False, - # or broadcasting error when pickle_safe is True. - # This seems to be a Keras bug!! - # Further note: the broadcasting error may only happen when val_size_ - # is not divisible by batch_size. - try: - dl_auc = roc_auc_score(y_test, dl_test_pred) - dl_loss = log_loss(y_test, dl_test_pred) - except ValueError: - dl_auc = 0. - dl_loss = np.inf - print "\nAUROC by the DL model: %.4f, loss: %.4f" % (dl_auc, dl_loss) - # import pdb; pdb.set_trace() - - # Elastic net training. - target_classes = np.array([0, 1]) - sgd_clf = SGDClassifier( - loss='log', penalty='elasticnet', alpha=alpha, l1_ratio=l1_ratio, - verbose=0, n_jobs=nb_worker, learning_rate='constant', eta0=init_lr, - random_state=random_seed, class_weight={0: 1.0, 1: pos_cls_weight}) - curr_lr = init_lr - best_epoch = 0 - best_auc = 0. - min_loss = np.inf - min_loss_epoch = 0 - for epoch in xrange(nb_epoch): - samples_seen = 0 - X_list = [] - y_list = [] - epoch_start = time.time() - while samples_seen < samples_per_epoch: - X, y = next(train_generator) - X_repr = reprlayer_model.predict_on_batch(X) - sgd_clf.partial_fit(X_repr, y, classes=target_classes) - samples_seen += len(y) - X_list.append(X_repr) - y_list.append(y) - # The training X, y are expected to change for each epoch due to - # image random sampling and class balancing. - X_train_epo = np.concatenate(X_list) - y_train_epo = np.concatenate(y_list) - # End of epoch summary. - pred_prob = sgd_clf.predict_proba(X_test)[:, 1] - train_prob = sgd_clf.predict_proba(X_train_epo)[:, 1] - try: - auc = roc_auc_score(y_test, pred_prob) - crossentropy_loss = log_loss(y_test, pred_prob) - except ValueError: - auc = 0. - crossentropy_loss = np.inf - try: - train_loss = log_loss(y_train_epo, train_prob) - except ValueError: - train_loss = np.inf - wei_sparseness = np.mean(sgd_clf.coef_ == 0) - epoch_span = time.time() - epoch_start - print ("%ds - Epoch=%d, auc=%.4f, train_loss=%.4f, test_loss=%.4f, " - "weight sparsity=%.4f") % \ - (epoch_span, epoch + 1, auc, train_loss, crossentropy_loss, - wei_sparseness) - # Model checkpoint, reducing learning rate and early stopping. - if auc > best_auc: - best_epoch = epoch + 1 - best_auc = auc - if best_model != "NOSAVE": - with open(best_model, 'w') as best_state: - pickle.dump(sgd_clf, best_state) - if crossentropy_loss < min_loss: - min_loss = crossentropy_loss - min_loss_epoch = epoch + 1 - else: - if epoch + 1 - min_loss_epoch >= es_patience: - print 'Early stopping criterion has reached. Stop training.' - break - if epoch + 1 - min_loss_epoch >= lr_patience: - curr_lr *= .1 - sgd_clf.set_params(eta0=curr_lr) - print "Reducing learning rate to: %s" % (curr_lr) - # End of training summary - print ">>> Found best AUROC: %.4f at epoch: %d, saved to: %s <<<" % \ - (best_auc, best_epoch, best_model) - print ">>> Found best val loss: %.4f at epoch: %d. <<<" % \ - (min_loss, min_loss_epoch) - #### Save elastic net model!! #### - if final_model != "NOSAVE": - with open(final_model, 'w') as final_state: - pickle.dump(sgd_clf, final_state) - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="DM ElasticNet training") - parser.add_argument("img_folder", type=str) - parser.add_argument("--img-extension", "-ext", dest="img_extension", - type=str, default="png") - parser.add_argument("--img-size", "-is", dest="img_size", nargs=2, type=int, - default=[288, 224]) - parser.add_argument("--multi-view", dest="multi_view", action="store_true") - parser.add_argument("--no-multi-view", dest="multi_view", action="store_false") - parser.set_defaults(multi_view=False) - parser.add_argument("--featurewise-norm", dest="do_featurewise_norm", action="store_true") - parser.add_argument("--no-featurewise-norm", dest="do_featurewise_norm", action="store_false") - parser.set_defaults(do_featurewise_norm=True) - parser.add_argument("--featurewise-mean", "-feam", dest="featurewise_mean", - type=float, default=7772.) - parser.add_argument("--featurewise-std", "-feas", dest="featurewise_std", - type=float, default=12187.) - parser.add_argument("--batch-size", "-bs", dest="batch_size", type=int, default=16) - parser.add_argument("--samples-per-epoch", "-spe", dest="samples_per_epoch", - type=int, default=160) - parser.add_argument("--nb-epoch", "-ne", dest="nb_epoch", type=int, default=20) - parser.add_argument("--balance-classes", "-bc", dest="balance_classes", type=float, default=.0) - parser.add_argument("--allneg-skip", dest="all_neg_skip", type=float, default=0.) - parser.add_argument("--pos-class-weight", "-pcw", dest="pos_cls_weight", type=float, default=1.0) - parser.add_argument("--alpha", dest="alpha", type=float, default=1.) - parser.add_argument("--l1-ratio", dest="l1_ratio", type=float, default=.5) - parser.add_argument("--init-learningrate", "-ilr", dest="init_lr", type=float, default=.01) - parser.add_argument("--lr-patience", "-lrp", dest="lr_patience", type=int, default=2) - parser.add_argument("--es-patience", "-esp", dest="es_patience", type=int, default=4) - parser.add_argument("--power-t", "-pt", dest="power_t", type=float, default=.25) - parser.add_argument("--val-size", "-vs", dest="val_size", type=float, default=.2) - # parser.add_argument("--resume-from", "-rf", dest="resume_from", type=str, default=None) - parser.add_argument("--exam-tsv", "-et", dest="exam_tsv", type=str, - default="./metadata/exams_metadata.tsv") - parser.add_argument("--no-exam-tsv", dest="exam_tsv", action="store_const", const=None) - parser.add_argument("--img-tsv", "-it", dest="img_tsv", type=str, - default="./metadata/images_crosswalk.tsv") - parser.add_argument("--dl-state", "-ds", dest="dl_state", type=str, default="none") - parser.add_argument("--best-model", "-bm", dest="best_model", type=str, - default="./modelState/enet_288_best_model.h5") - parser.add_argument("--final-model", "-fm", dest="final_model", type=str, - default="NOSAVE") - - args = parser.parse_args() - run_opts = dict( - img_extension=args.img_extension, - img_size=args.img_size, - multi_view=args.multi_view, - do_featurewise_norm=args.do_featurewise_norm, - featurewise_mean=args.featurewise_mean, - featurewise_std=args.featurewise_std, - batch_size=args.batch_size, - samples_per_epoch=args.samples_per_epoch, - nb_epoch=args.nb_epoch, - balance_classes=args.balance_classes, - all_neg_skip=args.all_neg_skip, - pos_cls_weight=args.pos_cls_weight, - alpha=args.alpha, - l1_ratio=args.l1_ratio, - init_lr=args.init_lr, - lr_patience=args.lr_patience, - es_patience=args.es_patience, - val_size=args.val_size if args.val_size < 1 else int(args.val_size), - # resume_from=args.resume_from, - exam_tsv=args.exam_tsv, - img_tsv=args.img_tsv, - dl_state=args.dl_state, - best_model=args.best_model, - final_model=args.final_model - ) - print "\n>>> Model training options: <<<\n", run_opts, "\n" - run(args.img_folder, **run_opts) diff --git a/training/dm_heatmap_score.py b/training/dm_heatmap_score.py deleted file mode 100644 index 655feb9..0000000 --- a/training/dm_heatmap_score.py +++ /dev/null @@ -1,224 +0,0 @@ -import argparse -import os, sys -import pickle -import numpy as np -from numpy.random import RandomState -# from sklearn.model_selection import train_test_split -from keras.models import load_model -from meta import DMMetaManager -from dm_image import ( - add_img_margins, - read_resize_img, - sweep_img_patches, - get_prob_heatmap) -from dm_keras_ext import DMMetrics as dmm -from dm_multi_gpu import make_parallel -from dm_preprocess import DMImagePreprocessor as prep -import warnings -import exceptions -warnings.filterwarnings('ignore', category=exceptions.UserWarning) - - -def run(img_folder, dl_state, img_extension='dcm', - img_height=1024, img_scale=255., equalize_hist=False, - featurewise_center=False, featurewise_mean=91.6, - neg_vs_pos_ratio=1., - net='vgg19', batch_size=128, patch_size=256, stride=8, - exam_tsv='./metadata/exams_metadata.tsv', - img_tsv='./metadata/images_crosswalk.tsv', - out='./modelState/prob_heatmap.pkl', - predicted_subj_file=None, add_subjs=500): - '''Sweep mammograms with trained DL model to create prob heatmaps - ''' - # Read some env variables. - random_seed = int(os.getenv('RANDOM_SEED', 12345)) - rng = RandomState(random_seed) # an rng used across board. - gpu_count = int(os.getenv('NUM_GPU_DEVICES', 1)) - - # Load and split image and label lists. - meta_man = DMMetaManager(exam_tsv=exam_tsv, - img_tsv=img_tsv, - img_folder=img_folder, - img_extension=img_extension) - subj_list, subj_labs = meta_man.get_subj_labs() - subj_labs = np.array(subj_labs) - print "Found %d subjests" % (len(subj_list)) - print "cancer patients=%d, normal patients=%d" \ - % ((subj_labs==1).sum(), (subj_labs==0).sum()) - if predicted_subj_file is not None: - predicted_subjs = np.load(predicted_subj_file) - subj_list = np.setdiff1d(subj_list, predicted_subjs) - subj_list = subj_list[:add_subjs] - print "Will predict additional %d subjects" % (len(subj_list)) - elif neg_vs_pos_ratio is not None: - subj_list, subj_labs = DMMetaManager.subset_subj_list( - subj_list, subj_labs, neg_vs_pos_ratio, random_seed) - subj_labs = np.array(subj_labs) - print "After subsetting, there are %d subjects" % (len(subj_list)) - print "cancer patients=%d, normal patients=%d" \ - % ((subj_labs==1).sum(), (subj_labs==0).sum()) - - # Get exam lists. - # >>>> Debug <<<< # - # subj_list = subj_list[:2] - # >>>> Debug <<<< # - print "Get flattened exam list" - exam_list = meta_man.get_flatten_exam_list(subj_list, cc_mlo_only=True) - exam_labs = meta_man.exam_labs(exam_list) - exam_labs = np.array(exam_labs) - print "positive exams=%d, negative exams=%d" \ - % ((exam_labs==1).sum(), (exam_labs==0).sum()) - sys.stdout.flush() - - # Load DL model. - print "Load patch classifier:", dl_state; sys.stdout.flush() - dl_model = load_model( - dl_state, - custom_objects={ - 'sensitivity': dmm.sensitivity, - 'specificity': dmm.specificity - } - ) - - if gpu_count > 1: - print "Make the model parallel on %d GPUs" % (gpu_count) - sys.stdout.flush() - dl_model, _ = make_parallel(dl_model, gpu_count) - parallelized = True - else: - parallelized = False - - # Load preprocess function. - if featurewise_center: - preprocess_input = None - else: - print "Load preprocess function for net:", net - if net == 'resnet50': - from keras.applications.resnet50 import preprocess_input - elif net == 'vgg16': - from keras.applications.vgg16 import preprocess_input - elif net == 'vgg19': - from keras.applications.vgg19 import preprocess_input - elif net == 'xception': - from keras.applications.xception import preprocess_input - elif net == 'inception': - from keras.applications.inception_v3 import preprocess_input - else: - raise Exception("Pretrained model is not available: " + net) - - # Sweep the whole images and classify patches. - print "Generate prob heatmaps for exam list" - sys.stdout.flush() - heatmap_dat_list = [] - for i,e in enumerate(exam_list): - dat = (e[0], e[1], - {'L':{'cancer':e[2]['L']['cancer']}, - 'R':{'cancer':e[2]['R']['cancer']}}) - dat[2]['L']['CC'] = get_prob_heatmap( - e[2]['L']['CC'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - dat[2]['L']['MLO'] = get_prob_heatmap( - e[2]['L']['MLO'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - dat[2]['R']['CC'] = get_prob_heatmap( - e[2]['R']['CC'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - dat[2]['R']['MLO'] = get_prob_heatmap( - e[2]['R']['MLO'], img_height, img_scale, patch_size, stride, - dl_model, batch_size, featurewise_center=featurewise_center, - featurewise_mean=featurewise_mean, preprocess=preprocess_input, - parallelized=parallelized, equalize_hist=equalize_hist) - heatmap_dat_list.append(dat) - print "processed %d/%d exams" % (i+1, len(exam_list)) - sys.stdout.flush() - ### DEBUG ### - # if i >= 1: - # break - ### DEBUG ### - print "Done." - - # Save the result. - print "Saving result to external files.", - sys.stdout.flush() - pickle.dump(heatmap_dat_list, open(out, 'w')) - print "Done." - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="DM prob heatmap scoring") - parser.add_argument("img_folder", type=str) - parser.add_argument("dl_state", type=str) - parser.add_argument("--img-extension", "-ext", dest="img_extension", type=str, default="dcm") - parser.add_argument("--img-height", "-ih", dest="img_height", type=int, default=1024) - parser.add_argument("--img-scale", "-ic", dest="img_scale", type=float, default=255.) - parser.add_argument("--equalize-hist", dest="equalize_hist", action="store_true") - parser.add_argument("--no-equalize-hist", dest="equalize_hist", action="store_false") - parser.set_defaults(equalize_hist=False) - parser.add_argument("--featurewise-center", dest="featurewise_center", action="store_true") - parser.add_argument("--no-featurewise-center", dest="featurewise_center", action="store_false") - parser.set_defaults(featurewise_center=False) - parser.add_argument("--featurewise-mean", dest="featurewise_mean", type=float, default=91.6) - parser.add_argument("--neg-vs-pos-ratio", dest="neg_vs_pos_ratio", type=float, default=10.) - parser.add_argument("--no-neg-vs-pos-ratio", dest="neg_vs_pos_ratio", - action="store_const", const=None) - parser.add_argument("--net", dest="net", type=str, default="vgg19") - parser.add_argument("--batch-size", dest="batch_size", type=int, default=128) - parser.add_argument("--patch-size", dest="patch_size", type=int, default=256) - parser.add_argument("--stride", dest="stride", type=int, default=8) - parser.add_argument("--exam-tsv", "-et", dest="exam_tsv", type=str, - default="./metadata/exams_metadata.tsv") - parser.add_argument("--no-exam-tsv", dest="exam_tsv", action="store_const", const=None) - parser.add_argument("--img-tsv", "-it", dest="img_tsv", type=str, - default="./metadata/images_crosswalk.tsv") - parser.add_argument("--out", dest="out", type=str, - default="./modelState/prob_heatmap.pkl") - parser.add_argument("--predicted-subj-file", dest="predicted_subj_file", type=str, default=None) - parser.add_argument("--no-predicted-subj-file", dest="predicted_subj_file", action="store_const", const=None) - parser.add_argument("--add-subjs", dest="add_subjs", type=int, default=500) - - args = parser.parse_args() - run_opts = dict( - img_extension=args.img_extension, - img_height=args.img_height, - img_scale=args.img_scale, - equalize_hist=args.equalize_hist, - featurewise_center=args.featurewise_center, - featurewise_mean=args.featurewise_mean, - neg_vs_pos_ratio=args.neg_vs_pos_ratio, - net=args.net, - batch_size=args.batch_size, - patch_size=args.patch_size, - stride=args.stride, - exam_tsv=args.exam_tsv, - img_tsv=args.img_tsv, - out=args.out, - predicted_subj_file=args.predicted_subj_file, - add_subjs=args.add_subjs - ) - print "\n" - print "img_folder=%s" % (args.img_folder) - print "dl_state=%s" % (args.dl_state) - print "\n>>> Model training options: <<<\n", run_opts, "\n" - run(args.img_folder, args.dl_state, **run_opts) - - - - - - - - - - - - - - - diff --git a/training/dm_patchClf_finetune.py b/training/dm_patchClf_finetune.py deleted file mode 100644 index ac4729c..0000000 --- a/training/dm_patchClf_finetune.py +++ /dev/null @@ -1,503 +0,0 @@ -import argparse -import os, sys -import pickle -import numpy as np -from numpy.random import RandomState -from scipy.misc import toimage -from sklearn.model_selection import train_test_split -from keras.models import load_model -from meta import DMMetaManager -from dm_image import add_img_margins, read_resize_img, sweep_img_patches -from dm_image import DMImageDataGenerator -from dm_keras_ext import ( - DMMetrics as dmm, - get_dl_model, create_optimizer, - load_dat_ram, - do_3stage_training -) -from dm_multi_gpu import make_parallel -from dm_preprocess import DMImagePreprocessor as prep - -import warnings -import exceptions -warnings.filterwarnings('ignore', category=exceptions.UserWarning) -import keras.backend as K -dim_ordering = K.image_dim_ordering() - - -def score_write_patches(img_list, lab_list, target_height, target_scale, - patch_size, stride, model, batch_size, - neg_out, pos_out, bkg_out, - preprocess=None, equalize_hist=False, - featurewise_center=False, featurewise_mean=91.6, - roi_cutoff=.9, bkg_cutoff=[.5, 1.], - sample_bkg=True, img_ext='png', random_seed=12345, - parallelized=False): - '''Score image patches and write them to an external directory - ''' - def write_patches(img_fn, patch_dat, idx, out_dir, img_ext='png'): - basename = os.path.basename(img_fn) - fn_no_ext = os.path.splitext(basename)[0] - if img_ext == 'png': - max_val = 65535. - else: - max_val = 255. - for i in idx: - patch = patch_dat[i] - patch_max = patch.max() if patch.max() != 0 else max_val - patch *= max_val/patch_max - patch = patch.astype('int32') - mode = 'I' if img_ext == 'png' else None - patch_img = toimage(patch, high=patch.max(), low=patch.min(), - mode=mode) - filename = fn_no_ext + "_%06d" % (i) + '.' + img_ext - fullname = os.path.join(out_dir, filename) - patch_img.save(fullname) - - rng = RandomState(random_seed) - nb_roi = 0 - nb_bkg = 0 - for img_fn, img_lab in zip(img_list, lab_list): - img = read_resize_img(img_fn, target_height=target_height) - img,_ = prep.segment_breast(img) - img = add_img_margins(img, patch_size/2) - patch_dat, nb_row, nb_col = sweep_img_patches( - img, patch_size, stride, target_scale=target_scale, - equalize_hist=equalize_hist) - org_patch_dat = patch_dat.copy() - if parallelized and len(patch_dat) % 2 == 1: - last_patch = patch_dat[-1:,:,:] - patch_dat = np.append(patch_dat, last_patch, axis=0) - appended = True - else: - appended = False - if dim_ordering == 'th': - patch_X = np.zeros((patch_dat.shape[0], 3, - patch_dat.shape[1], - patch_dat.shape[2]), - dtype='float64') - patch_X[:,0,:,:] = patch_dat - patch_X[:,1,:,:] = patch_dat - patch_X[:,2,:,:] = patch_dat - else: - patch_X = np.zeros((patch_dat.shape[0], - patch_dat.shape[1], - patch_dat.shape[2], 3), - dtype='float64') - patch_X[:,:,:,0] = patch_dat - patch_X[:,:,:,1] = patch_dat - patch_X[:,:,:,2] = patch_dat - if featurewise_center: - patch_X -= featurewise_mean - elif preprocess is not None: - patch_X = preprocess(patch_X) - pred = model.predict(patch_X, batch_size=batch_size) - # import pdb; pdb.set_trace() - if appended: - pred = pred[:-1] - roi_idx = np.where(pred[:,0] < 1 - roi_cutoff)[0] - bkg_idx = np.where( - np.logical_and(pred[:,0] > bkg_cutoff[0], - pred[:,0] <= bkg_cutoff[1]))[0] - if sample_bkg and len(bkg_idx) > len(roi_idx): - bkg_idx = rng.choice(bkg_idx, len(roi_idx), replace=False) - roi_out = pos_out if img_lab==1 else neg_out - write_patches(img_fn, org_patch_dat, roi_idx, roi_out, img_ext) - write_patches(img_fn, org_patch_dat, bkg_idx, bkg_out, img_ext) - nb_roi += len(roi_idx) - nb_bkg += len(bkg_idx) - return nb_roi, nb_bkg - - -def run(img_folder, dl_state, best_model, img_extension='dcm', - img_height=1024, img_scale=255., equalize_hist=False, - featurewise_center=False, featurewise_mean=91.6, - neg_vs_pos_ratio=1., val_size=.1, test_size=.15, - net='vgg19', batch_size=128, train_bs_multiplier=.5, - patch_size=256, stride=8, roi_cutoff=.9, bkg_cutoff=[.5, 1.], sample_bkg=True, - train_out='./scratch/train', val_out='./scratch/val', - test_out='./scratch/test', out_img_ext='png', - neg_name='benign', pos_name='malignant', bkg_name='background', - augmentation=True, load_train_ram=False, load_val_ram=False, - top_layer_nb=None, nb_epoch=10, top_layer_epochs=0, all_layer_epochs=0, - optim='sgd', init_lr=.01, - top_layer_multiplier=.01, all_layer_multiplier=.0001, - es_patience=5, lr_patience=2, weight_decay2=.01, bias_multiplier=.1, - hidden_dropout2=.0, - exam_tsv='./metadata/exams_metadata.tsv', - img_tsv='./metadata/images_crosswalk.tsv', - out='./modelState/subj_lists.pkl'): - '''Finetune a trained DL model on a different dataset - ''' - # Read some env variables. - random_seed = int(os.getenv('RANDOM_SEED', 12345)) - rng = RandomState(random_seed) # an rng used across board. - nb_worker = int(os.getenv('NUM_CPU_CORES', 4)) - gpu_count = int(os.getenv('NUM_GPU_DEVICES', 1)) - - # Load and split image and label lists. - meta_man = DMMetaManager(exam_tsv=exam_tsv, - img_tsv=img_tsv, - img_folder=img_folder, - img_extension=img_extension) - subj_list, subj_labs = meta_man.get_subj_labs() - subj_labs = np.array(subj_labs) - print "Found %d subjests" % (len(subj_list)) - print "cancer patients=%d, normal patients=%d" \ - % ((subj_labs==1).sum(), (subj_labs==0).sum()) - if neg_vs_pos_ratio is not None: - subj_list, subj_labs = DMMetaManager.subset_subj_list( - subj_list, subj_labs, neg_vs_pos_ratio, random_seed) - subj_labs = np.array(subj_labs) - print "After subsetting, there are %d subjects" % (len(subj_list)) - print "cancer patients=%d, normal patients=%d" \ - % ((subj_labs==1).sum(), (subj_labs==0).sum()) - subj_train, subj_test, labs_train, labs_test = train_test_split( - subj_list, subj_labs, test_size=test_size, stratify=subj_labs, - random_state=random_seed) - subj_train, subj_val, labs_train, labs_val = train_test_split( - subj_train, labs_train, test_size=val_size, stratify=labs_train, - random_state=random_seed) - - # Get image lists. - # >>>> Debug <<<< # - # subj_train = subj_train[:5] - # subj_val = subj_val[:5] - # subj_test = subj_test[:5] - # >>>> Debug <<<< # - print "Get flattened image lists" - img_train, ilab_train = meta_man.get_flatten_img_list(subj_train) - img_val, ilab_val = meta_man.get_flatten_img_list(subj_val) - img_test, ilab_test = meta_man.get_flatten_img_list(subj_test) - ilab_train = np.array(ilab_train) - ilab_val = np.array(ilab_val) - ilab_test = np.array(ilab_test) - print "On train set, positive img=%d, negative img=%d" \ - % ((ilab_train==1).sum(), (ilab_train==0).sum()) - print "On val set, positive img=%d, negative img=%d" \ - % ((ilab_val==1).sum(), (ilab_val==0).sum()) - print "On test set, positive img=%d, negative img=%d" \ - % ((ilab_test==1).sum(), (ilab_test==0).sum()) - sys.stdout.flush() - - # Save the subj lists. - print "Saving subject lists to external files.", - sys.stdout.flush() - pickle.dump((subj_train, subj_val, subj_test), open(out, 'w')) - print "Done." - - # Load DL model, preprocess function. - print "Load patch classifier:", dl_state; sys.stdout.flush() - dl_model, preprocess_input, top_layer_nb = get_dl_model( - net, use_pretrained=True, resume_from=dl_state, - top_layer_nb=top_layer_nb) - if featurewise_center: - preprocess_input = None - if gpu_count > 1: - print "Make the model parallel on %d GPUs" % (gpu_count) - sys.stdout.flush() - dl_model, org_model = make_parallel(dl_model, gpu_count) - parallelized = True - else: - org_model = dl_model - parallelized = False - - # Sweep the whole images and classify patches. - print "Score image patches and write them to:", train_out - sys.stdout.flush() - nb_roi_train, nb_bkg_train = score_write_patches( - img_train, ilab_train, img_height, img_scale, - patch_size, stride, dl_model, batch_size, - neg_out=os.path.join(train_out, neg_name), - pos_out=os.path.join(train_out, pos_name), - bkg_out=os.path.join(train_out, bkg_name), - preprocess=preprocess_input, equalize_hist=equalize_hist, - featurewise_center=featurewise_center, featurewise_mean=featurewise_mean, - roi_cutoff=roi_cutoff, bkg_cutoff=bkg_cutoff, - sample_bkg=sample_bkg, img_ext=out_img_ext, random_seed=random_seed, - parallelized=parallelized) - print "Wrote %d ROI and %d bkg patches" % (nb_roi_train, nb_bkg_train) - #### - print "Score image patches and write them to:", val_out - sys.stdout.flush() - nb_roi_val, nb_bkg_val = score_write_patches( - img_val, ilab_val, img_height, img_scale, - patch_size, stride, dl_model, batch_size, - neg_out=os.path.join(val_out, neg_name), - pos_out=os.path.join(val_out, pos_name), - bkg_out=os.path.join(val_out, bkg_name), - preprocess=preprocess_input, equalize_hist=equalize_hist, - featurewise_center=featurewise_center, featurewise_mean=featurewise_mean, - roi_cutoff=roi_cutoff, bkg_cutoff=bkg_cutoff, - sample_bkg=sample_bkg, img_ext=out_img_ext, random_seed=random_seed, - parallelized=parallelized) - print "Wrote %d ROI and %d bkg patches" % (nb_roi_val, nb_bkg_val) - #### - print "Score image patches and write them to:", test_out - sys.stdout.flush() - nb_roi_test, nb_bkg_test = score_write_patches( - img_test, ilab_test, img_height, img_scale, - patch_size, stride, dl_model, batch_size, - neg_out=os.path.join(test_out, neg_name), - pos_out=os.path.join(test_out, pos_name), - bkg_out=os.path.join(test_out, bkg_name), - preprocess=preprocess_input, equalize_hist=equalize_hist, - featurewise_center=featurewise_center, featurewise_mean=featurewise_mean, - roi_cutoff=roi_cutoff, bkg_cutoff=bkg_cutoff, - sample_bkg=sample_bkg, img_ext=out_img_ext, random_seed=random_seed, - parallelized=parallelized) - print "Wrote %d ROI and %d bkg patches" % (nb_roi_test, nb_bkg_test) - sys.stdout.flush() - - # ==== Image generators ==== # - if featurewise_center: - train_imgen = DMImageDataGenerator(featurewise_center=True) - val_imgen = DMImageDataGenerator(featurewise_center=True) - test_imgen = DMImageDataGenerator(featurewise_center=True) - train_imgen.mean = featurewise_mean - val_imgen.mean = featurewise_mean - test_imgen.mean = featurewise_mean - else: - train_imgen = DMImageDataGenerator() - val_imgen = DMImageDataGenerator() - test_imgen = DMImageDataGenerator() - if augmentation: - train_imgen.horizontal_flip=True - train_imgen.vertical_flip=True - train_imgen.rotation_range = 45. - train_imgen.shear_range = np.pi/8. - - # ==== Train & val set ==== # - # Note: the images are histogram equalized before they were written to - # external folders. - train_bs = int(batch_size*train_bs_multiplier) - if load_train_ram: - raw_imgen = DMImageDataGenerator() - print "Create generator for raw train set" - raw_generator = raw_imgen.flow_from_directory( - train_out, target_size=(patch_size, patch_size), - target_scale=img_scale, equalize_hist=False, dup_3_channels=True, - classes=[bkg_name, pos_name, neg_name], class_mode='categorical', - batch_size=train_bs, shuffle=False) - print "Loading raw train set into RAM.", - sys.stdout.flush() - raw_set = load_dat_ram(raw_generator, raw_generator.nb_sample) - print "Done."; sys.stdout.flush() - print "Create generator for train set" - train_generator = train_imgen.flow( - raw_set[0], raw_set[1], batch_size=train_bs, - auto_batch_balance=True, preprocess=preprocess_input, - shuffle=True, seed=random_seed) - else: - print "Create generator for train set" - train_generator = train_imgen.flow_from_directory( - train_out, target_size=(patch_size, patch_size), - target_scale=img_scale, equalize_hist=False, dup_3_channels=True, - classes=[bkg_name, pos_name, neg_name], class_mode='categorical', - auto_batch_balance=True, batch_size=train_bs, - preprocess=preprocess_input, shuffle=True, seed=random_seed) - - print "Create generator for val set" - sys.stdout.flush() - validation_set = val_imgen.flow_from_directory( - val_out, target_size=(patch_size, patch_size), target_scale=img_scale, - equalize_hist=False, dup_3_channels=True, - classes=[bkg_name, pos_name, neg_name], - class_mode='categorical', batch_size=batch_size, - preprocess=preprocess_input, shuffle=False) - val_samples = validation_set.nb_sample - if parallelized and val_samples % batch_size != 0: - val_samples -= val_samples % batch_size - print "Validation samples =", val_samples; sys.stdout.flush() - if load_val_ram: - print "Loading validation set into RAM.", - sys.stdout.flush() - validation_set = load_dat_ram(validation_set, val_samples) - print "Done." - print "Loaded %d val samples" % (len(validation_set[0])) - sys.stdout.flush() - - # ==== Model finetuning ==== # - train_batches = int(train_generator.nb_sample/train_bs) + 1 - samples_per_epoch = train_bs*train_batches - # import pdb; pdb.set_trace() - dl_model, loss_hist, acc_hist = do_3stage_training( - dl_model, org_model, train_generator, validation_set, val_samples, - best_model, samples_per_epoch, top_layer_nb, net, nb_epoch=nb_epoch, - top_layer_epochs=top_layer_epochs, all_layer_epochs=all_layer_epochs, - use_pretrained=True, optim=optim, init_lr=init_lr, - top_layer_multiplier=top_layer_multiplier, - all_layer_multiplier=all_layer_multiplier, - es_patience=es_patience, lr_patience=lr_patience, - auto_batch_balance=True, nb_worker=nb_worker, - weight_decay2=weight_decay2, bias_multiplier=bias_multiplier, - hidden_dropout2=hidden_dropout2) - - # Training report. - min_loss_locs, = np.where(loss_hist == min(loss_hist)) - best_val_loss = loss_hist[min_loss_locs[0]] - best_val_accuracy = acc_hist[min_loss_locs[0]] - print "\n==== Training summary ====" - print "Minimum val loss achieved at epoch:", min_loss_locs[0] + 1 - print "Best val loss:", best_val_loss - print "Best val accuracy:", best_val_accuracy - - # ==== Predict on test set ==== # - print "\n==== Predicting on test set ====" - print "Create generator for test set" - test_generator = test_imgen.flow_from_directory( - test_out, target_size=(patch_size, patch_size), target_scale=img_scale, - equalize_hist=False, dup_3_channels=True, - classes=[bkg_name, pos_name, neg_name], - class_mode='categorical', batch_size=batch_size, - preprocess=preprocess_input, shuffle=False) - test_samples = test_generator.nb_sample - if parallelized and test_samples % batch_size != 0: - test_samples -= test_samples % batch_size - print "Test samples =", test_samples - print "Load saved best model:", best_model + '.', - sys.stdout.flush() - org_model.load_weights(best_model) - print "Done." - test_res = dl_model.evaluate_generator( - test_generator, test_samples, nb_worker=nb_worker, - pickle_safe=True if nb_worker > 1 else False) - print "Evaluation result on test set:", test_res - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="DM DL model finetuning") - parser.add_argument("img_folder", type=str) - parser.add_argument("dl_state", type=str) - parser.add_argument("best_model", type=str) - parser.add_argument("--img-extension", "-ext", dest="img_extension", type=str, default="dcm") - parser.add_argument("--img-height", "-ih", dest="img_height", type=int, default=1024) - parser.add_argument("--img-scale", "-ic", dest="img_scale", type=float, default=255.) - parser.add_argument("--equalize-hist", dest="equalize_hist", action="store_true") - parser.add_argument("--no-equalize-hist", dest="equalize_hist", action="store_false") - parser.set_defaults(equalize_hist=False) - parser.add_argument("--featurewise-center", dest="featurewise_center", action="store_true") - parser.add_argument("--no-featurewise-center", dest="featurewise_center", action="store_false") - parser.set_defaults(featurewise_center=False) - parser.add_argument("--featurewise-mean", dest="featurewise_mean", type=float, default=91.6) - parser.add_argument("--neg-vs-pos-ratio", dest="neg_vs_pos_ratio", type=float, default=10.) - parser.add_argument("--no-neg-vs-pos-ratio", dest="neg_vs_pos_ratio", - action="store_const", const=None) - parser.add_argument("--test-size", dest="test_size", type=float, default=.15) - parser.add_argument("--val-size", dest="val_size", type=float, default=.1) - parser.add_argument("--net", dest="net", type=str, default="vgg19") - parser.add_argument("--batch-size", dest="batch_size", type=int, default=128) - parser.add_argument("--train-bs-multiplier", dest="train_bs_multiplier", type=float, default=.5) - parser.add_argument("--patch-size", dest="patch_size", type=int, default=256) - parser.add_argument("--stride", dest="stride", type=int, default=8) - parser.add_argument("--roi-cutoff", dest="roi_cutoff", type=float, default=.9) - parser.add_argument("--bkg-cutoff", dest="bkg_cutoff", nargs=2, type=float, default=[.5, 1.]) - parser.add_argument("--sample-bkg", dest="sample_bkg", action="store_true") - parser.add_argument("--no-sample-bkg", dest="sample_bkg", action="store_false") - parser.set_defaults(sample_bkg=True) - parser.add_argument("--train-out", dest="train_out", type=str, default="./scratch/train") - parser.add_argument("--val-out", dest="val_out", type=str, default="./scratch/val") - parser.add_argument("--test-out", dest="test_out", type=str, default="./scratch/test") - parser.add_argument("--out-img-ext", dest="out_img_ext", type=str, default='png') - parser.add_argument("--neg-name", dest="neg_name", type=str, default="benign") - parser.add_argument("--pos-name", dest="pos_name", type=str, default="malignant") - parser.add_argument("--bkg-name", dest="bkg_name", type=str, default="background") - parser.add_argument("--augmentation", dest="augmentation", action="store_true") - parser.add_argument("--no-augmentation", dest="augmentation", action="store_false") - parser.set_defaults(augmentation=True) - parser.add_argument("--load-train-ram", dest="load_train_ram", action="store_true") - parser.add_argument("--no-load-train-ram", dest="load_train_ram", action="store_false") - parser.set_defaults(load_train_ram=False) - parser.add_argument("--load-val-ram", dest="load_val_ram", action="store_true") - parser.add_argument("--no-load-val-ram", dest="load_val_ram", action="store_false") - parser.set_defaults(load_val_ram=False) - parser.add_argument("--top-layer-nb", dest="top_layer_nb", type=int, default=None) - parser.add_argument("--no-top-layer-nb", dest="top_layer_nb", action="store_const", const=None) - parser.add_argument("--nb-epoch", dest="nb_epoch", type=int, default=10) - parser.add_argument("--top-layer-epochs", dest="top_layer_epochs", type=int, default=0) - parser.add_argument("--all-layer-epochs", dest="all_layer_epochs", type=int, default=0) - parser.add_argument("--optim", dest="optim", type=str, default="sgd") - parser.add_argument("--init-lr", dest="init_lr", type=float, default=.01) - parser.add_argument("--top-layer-multiplier", dest="top_layer_multiplier", type=float, default=.01) - parser.add_argument("--all-layer-multiplier", dest="all_layer_multiplier", type=float, default=.0001) - parser.add_argument("--es-patience", dest="es_patience", type=int, default=5) - parser.add_argument("--lr-patience", dest="lr_patience", type=int, default=2) - parser.add_argument("--weight-decay2", dest="weight_decay2", type=float, default=.01) - parser.add_argument("--bias-multiplier", dest="bias_multiplier", type=float, default=.1) - parser.add_argument("--hidden-dropout2", dest="hidden_dropout2", type=float, default=.0) - parser.add_argument("--exam-tsv", "-et", dest="exam_tsv", type=str, - default="./metadata/exams_metadata.tsv") - parser.add_argument("--no-exam-tsv", dest="exam_tsv", action="store_const", const=None) - parser.add_argument("--img-tsv", "-it", dest="img_tsv", type=str, - default="./metadata/images_crosswalk.tsv") - parser.add_argument("--out", dest="out", type=str, - default="./modelState/subj_lists.pkl") - - args = parser.parse_args() - run_opts = dict( - img_extension=args.img_extension, - img_height=args.img_height, - img_scale=args.img_scale, - equalize_hist=args.equalize_hist, - featurewise_center=args.featurewise_center, - featurewise_mean=args.featurewise_mean, - neg_vs_pos_ratio=args.neg_vs_pos_ratio, - test_size=args.test_size, - val_size=args.val_size, - net=args.net, - batch_size=args.batch_size, - train_bs_multiplier=args.train_bs_multiplier, - patch_size=args.patch_size, - stride=args.stride, - roi_cutoff=args.roi_cutoff, - bkg_cutoff=args.bkg_cutoff, - sample_bkg=args.sample_bkg, - train_out=args.train_out, - val_out=args.val_out, - test_out=args.test_out, - out_img_ext=args.out_img_ext, - neg_name=args.neg_name, - pos_name=args.pos_name, - bkg_name=args.bkg_name, - augmentation=args.augmentation, - load_train_ram=args.load_train_ram, - load_val_ram=args.load_val_ram, - top_layer_nb=args.top_layer_nb, - nb_epoch=args.nb_epoch, - top_layer_epochs=args.top_layer_epochs, - all_layer_epochs=args.all_layer_epochs, - optim=args.optim, - init_lr=args.init_lr, - top_layer_multiplier=args.top_layer_multiplier, - all_layer_multiplier=args.all_layer_multiplier, - es_patience=args.es_patience, - lr_patience=args.lr_patience, - weight_decay2=args.weight_decay2, - bias_multiplier=args.bias_multiplier, - hidden_dropout2=args.hidden_dropout2, - exam_tsv=args.exam_tsv, - img_tsv=args.img_tsv, - out=args.out, - ) - print "\n" - print "img_folder=%s" % (args.img_folder) - print "dl_state=%s" % (args.dl_state) - print "best_model=%s" % (args.best_model) - print "\n>>> Model training options: <<<\n", run_opts, "\n" - run(args.img_folder, args.dl_state, args.best_model, **run_opts) - - - - - - - - - - - - - - - diff --git a/training/dm_resnet_train.py b/training/dm_resnet_train.py deleted file mode 100644 index b26718e..0000000 --- a/training/dm_resnet_train.py +++ /dev/null @@ -1,333 +0,0 @@ -from sklearn.model_selection import train_test_split -from keras.callbacks import ( - ReduceLROnPlateau, - EarlyStopping, - # ModelCheckpoint -) -from keras.optimizers import SGD -from keras.models import load_model -import os, argparse -import numpy as np -from meta import DMMetaManager -from dm_image import DMImageDataGenerator -from dm_resnet import ( - ResNetBuilder, - MultiViewResNetBuilder -) -from dm_multi_gpu import make_parallel -from dm_keras_ext import DMMetrics, DMAucModelCheckpoint - -import warnings -import exceptions -warnings.filterwarnings('ignore', category=exceptions.UserWarning) - - -def run(img_folder, img_extension='dcm', - img_size=[288, 224], img_scale=4095, multi_view=False, - do_featurewise_norm=True, featurewise_mean=398.5, featurewise_std=627.8, - batch_size=16, samples_per_epoch=160, nb_epoch=20, - balance_classes=.0, all_neg_skip=0., pos_cls_weight=1.0, - nb_init_filter=64, init_filter_size=7, init_conv_stride=2, - pool_size=3, pool_stride=2, weight_decay=.0001, alpha=1., l1_ratio=.5, - inp_dropout=.0, hidden_dropout=.0, init_lr=.01, - val_size=.2, lr_patience=5, es_patience=10, - resume_from=None, net='resnet50', load_val_ram=False, - exam_tsv='./metadata/exams_metadata.tsv', - img_tsv='./metadata/images_crosswalk.tsv', - best_model='./modelState/dm_resnet_best_model.h5', - final_model="NOSAVE"): - '''Run ResNet training on mammograms using an exam or image list - Args: - featurewise_mean, featurewise_std ([float]): they are estimated from - 1152 x 896 images. Using different sized images give very close - results. For png, mean=7772, std=12187. - ''' - - # Read some env variables. - random_seed = int(os.getenv('RANDOM_SEED', 12345)) - nb_worker = int(os.getenv('NUM_CPU_CORES', 4)) - gpu_count = int(os.getenv('NUM_GPU_DEVICES', 1)) - - # Setup training and validation data. - # Load image or exam lists and split them into train and val sets. - meta_man = DMMetaManager(exam_tsv=exam_tsv, img_tsv=img_tsv, - img_folder=img_folder, img_extension=img_extension) - if multi_view: - exam_list = meta_man.get_flatten_exam_list() - exam_train, exam_val = train_test_split( - exam_list, test_size=val_size, random_state=random_seed, - stratify=meta_man.exam_labs(exam_list)) - val_size_ = len(exam_val)*2 # L and R. - else: - img_list, lab_list = meta_man.get_flatten_img_list() - img_train, img_val, lab_train, lab_val = train_test_split( - img_list, lab_list, test_size=val_size, random_state=random_seed, - stratify=lab_list) - val_size_ = len(img_val) - - # Create image generator. - img_gen = DMImageDataGenerator( - horizontal_flip=True, - vertical_flip=True) - if do_featurewise_norm: - img_gen.featurewise_center = True - img_gen.featurewise_std_normalization = True - img_gen.mean = featurewise_mean - img_gen.std = featurewise_std - else: - img_gen.samplewise_center = True - img_gen.samplewise_std_normalization = True - - if multi_view: - train_generator = img_gen.flow_from_exam_list( - exam_train, target_size=(img_size[0], img_size[1]), - target_scale=img_scale, - batch_size=batch_size, balance_classes=balance_classes, - all_neg_skip=all_neg_skip, shuffle=True, seed=random_seed, - class_mode='binary') - if load_val_ram: - val_generator = img_gen.flow_from_exam_list( - exam_val, target_size=(img_size[0], img_size[1]), - target_scale=img_scale, - batch_size=val_size_, validation_mode=True, - class_mode='binary') - else: - val_generator = img_gen.flow_from_exam_list( - exam_val, target_size=(img_size[0], img_size[1]), - target_scale=img_scale, - batch_size=batch_size, validation_mode=True, - class_mode='binary') - else: - train_generator = img_gen.flow_from_img_list( - img_train, lab_train, target_size=(img_size[0], img_size[1]), - target_scale=img_scale, - batch_size=batch_size, balance_classes=balance_classes, - all_neg_skip=all_neg_skip, shuffle=True, seed=random_seed, - class_mode='binary') - if load_val_ram: - val_generator = img_gen.flow_from_img_list( - img_val, lab_val, target_size=(img_size[0], img_size[1]), - target_scale=img_scale, - batch_size=val_size_, validation_mode=True, - class_mode='binary') - else: - val_generator = img_gen.flow_from_img_list( - img_val, lab_val, target_size=(img_size[0], img_size[1]), - target_scale=img_scale, - batch_size=batch_size, validation_mode=True, - class_mode='binary') - - # Load validation set into RAM. - if load_val_ram: - validation_set = next(val_generator) - if not multi_view and len(validation_set[0]) != val_size_: - raise Exception - elif len(validation_set[0][0]) != val_size_ \ - or len(validation_set[0][1]) != val_size_: - raise Exception - - # Create model. - if resume_from is not None: - model = load_model( - resume_from, - custom_objects={ - 'sensitivity': DMMetrics.sensitivity, - 'specificity': DMMetrics.specificity - } - ) - else: - if multi_view: - builder = MultiViewResNetBuilder - else: - builder = ResNetBuilder - if net == 'resnet18': - model = builder.build_resnet_18( - (1, img_size[0], img_size[1]), 1, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - elif net == 'resnet34': - model = builder.build_resnet_34( - (1, img_size[0], img_size[1]), 1, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - elif net == 'resnet50': - model = builder.build_resnet_50( - (1, img_size[0], img_size[1]), 1, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - elif net == 'dmresnet14': - model = builder.build_dm_resnet_14( - (1, img_size[0], img_size[1]), 1, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - elif net == 'dmresnet47rb5': - model = builder.build_dm_resnet_47rb5( - (1, img_size[0], img_size[1]), 1, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - elif net == 'dmresnet56rb6': - model = builder.build_dm_resnet_56rb6( - (1, img_size[0], img_size[1]), 1, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - elif net == 'dmresnet65rb7': - model = builder.build_dm_resnet_65rb7( - (1, img_size[0], img_size[1]), 1, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - elif net == 'resnet101': - model = builder.build_resnet_101( - (1, img_size[0], img_size[1]), 1, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - elif net == 'resnet152': - model = builder.build_resnet_152( - (1, img_size[0], img_size[1]), 1, nb_init_filter, init_filter_size, - init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - - if gpu_count > 1: - model = make_parallel(model, gpu_count) - - # Model training. - sgd = SGD(lr=init_lr, momentum=0.9, decay=0.0, nesterov=True) - model.compile(optimizer=sgd, loss='binary_crossentropy', - metrics=[DMMetrics.sensitivity, DMMetrics.specificity]) - reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, - patience=lr_patience, verbose=1) - early_stopping = EarlyStopping(monitor='val_loss', patience=es_patience, verbose=1) - if load_val_ram: - auc_checkpointer = DMAucModelCheckpoint(best_model, validation_set, - batch_size=batch_size) - else: - auc_checkpointer = DMAucModelCheckpoint(best_model, val_generator, - nb_test_samples=val_size_) - # checkpointer = ModelCheckpoint( - # best_model, monitor='val_loss', verbose=1, save_best_only=True) - hist = model.fit_generator( - train_generator, - samples_per_epoch=samples_per_epoch, - nb_epoch=nb_epoch, - class_weight={ 0: 1.0, 1: pos_cls_weight }, - validation_data=validation_set if load_val_ram else val_generator, - nb_val_samples=val_size_, - callbacks=[reduce_lr, early_stopping, auc_checkpointer], - nb_worker=nb_worker, - pickle_safe=True, # turn on pickle_safe to avoid a strange error. - verbose=2 - ) - - # Training report. - min_loss_locs, = np.where(hist.history['val_loss'] == min(hist.history['val_loss'])) - best_val_loss = hist.history['val_loss'][min_loss_locs[0]] - best_val_sensitivity = hist.history['val_sensitivity'][min_loss_locs[0]] - best_val_specificity = hist.history['val_specificity'][min_loss_locs[0]] - print "\n==== Training summary ====" - print "Minimum val loss achieved at epoch:", min_loss_locs[0] + 1 - print "Best val loss:", best_val_loss - print "Best val sensitivity:", best_val_sensitivity - print "Best val specificity:", best_val_specificity - - if final_model != "NOSAVE": - model.save(final_model) - - return hist - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="DM ResNet training") - parser.add_argument("img_folder", type=str) - parser.add_argument("--img-extension", "-ext", dest="img_extension", - type=str, default="dcm") - parser.add_argument("--img-size", "-is", dest="img_size", nargs=2, type=int, - default=[288, 224]) - parser.add_argument("--img-scale", "-ic", dest="img_scale", type=int, default=4095) - parser.add_argument("--multi-view", dest="multi_view", action="store_true") - parser.add_argument("--no-multi-view", dest="multi_view", action="store_false") - parser.set_defaults(multi_view=False) - parser.add_argument("--featurewise-norm", dest="do_featurewise_norm", action="store_true") - parser.add_argument("--no-featurewise-norm", dest="do_featurewise_norm", action="store_false") - parser.set_defaults(do_featurewise_norm=True) - parser.add_argument("--featurewise-mean", "-feam", dest="featurewise_mean", - type=float, default=398.5) - parser.add_argument("--featurewise-std", "-feas", dest="featurewise_std", - type=float, default=627.8) - parser.add_argument("--batch-size", "-bs", dest="batch_size", type=int, default=16) - parser.add_argument("--samples-per-epoch", "-spe", dest="samples_per_epoch", - type=int, default=160) - parser.add_argument("--nb-epoch", "-ne", dest="nb_epoch", type=int, default=20) - parser.add_argument("--balance-classes", "-bc", dest="balance_classes", type=float, default=.0) - parser.add_argument("--allneg-skip", dest="all_neg_skip", type=float, default=0.) - parser.add_argument("--pos-class-weight", "-pcw", dest="pos_cls_weight", type=float, default=1.0) - parser.add_argument("--nb-init-filter", "-nif", dest="nb_init_filter", type=int, default=64) - parser.add_argument("--init-filter-size", "-ifs", dest="init_filter_size", type=int, default=7) - parser.add_argument("--init-conv-stride", "-ics", dest="init_conv_stride", type=int, default=2) - parser.add_argument("--max-pooling-size", "-mps", dest="pool_size", type=int, default=3) - parser.add_argument("--max-pooling-stride", "-mpr", dest="pool_stride", type=int, default=2) - parser.add_argument("--weight-decay", "-wd", dest="weight_decay", - type=float, default=.0001) - parser.add_argument("--alpha", dest="alpha", type=float, default=1.) - parser.add_argument("--l1-ratio", dest="l1_ratio", type=float, default=.5) - parser.add_argument("--inp-dropout", "-id", dest="inp_dropout", type=float, default=.0) - parser.add_argument("--hidden-dropout", "-hd", dest="hidden_dropout", type=float, default=.0) - parser.add_argument("--init-learningrate", "-ilr", dest="init_lr", type=float, default=.01) - parser.add_argument("--val-size", "-vs", dest="val_size", type=float, default=.2) - parser.add_argument("--lr-patience", "-lrp", dest="lr_patience", type=int, default=5) - parser.add_argument("--es-patience", "-esp", dest="es_patience", type=int, default=10) - parser.add_argument("--resume-from", "-rf", dest="resume_from", type=str, default=None) - parser.add_argument("--net", dest="net", type=str, default="resnet50") - parser.add_argument("--loadval-ram", dest="load_val_ram", action="store_true") - parser.add_argument("--no-loadval-ram", dest="load_val_ram", action="store_false") - parser.set_defaults(load_val_ram=False) - parser.add_argument("--exam-tsv", "-et", dest="exam_tsv", type=str, - default="./metadata/exams_metadata.tsv") - parser.add_argument("--no-exam-tsv", dest="exam_tsv", action="store_const", const=None) - parser.add_argument("--img-tsv", "-it", dest="img_tsv", type=str, - default="./metadata/images_crosswalk.tsv") - parser.add_argument("--best-model", "-bm", dest="best_model", type=str, - default="./modelState/dm_resnet_best_model.h5") - parser.add_argument("--final-model", "-fm", dest="final_model", type=str, - default="NOSAVE") - - args = parser.parse_args() - run_opts = dict( - img_extension=args.img_extension, - img_size=args.img_size, - img_scale=args.img_scale, - multi_view=args.multi_view, - do_featurewise_norm=args.do_featurewise_norm, - featurewise_mean=args.featurewise_mean, - featurewise_std=args.featurewise_std, - batch_size=args.batch_size, - samples_per_epoch=args.samples_per_epoch, - nb_epoch=args.nb_epoch, - balance_classes=args.balance_classes, - all_neg_skip=args.all_neg_skip, - pos_cls_weight=args.pos_cls_weight, - nb_init_filter=args.nb_init_filter, - init_filter_size=args.init_filter_size, - init_conv_stride=args.init_conv_stride, - pool_size=args.pool_size, - pool_stride=args.pool_stride, - weight_decay=args.weight_decay, - alpha=args.alpha, - l1_ratio=args.l1_ratio, - inp_dropout=args.inp_dropout, - hidden_dropout=args.hidden_dropout, - init_lr=args.init_lr, - val_size=args.val_size if args.val_size < 1 else int(args.val_size), - lr_patience=args.lr_patience, - es_patience=args.es_patience, - resume_from=args.resume_from, - net=args.net, - load_val_ram=args.load_val_ram, - exam_tsv=args.exam_tsv, - img_tsv=args.img_tsv, - best_model=args.best_model, - final_model=args.final_model - ) - print "\n>>> Model training options: <<<\n", run_opts, "\n" - run(args.img_folder, **run_opts) - - diff --git a/training/dm_sc2_meta_clf.py b/training/dm_sc2_meta_clf.py deleted file mode 100644 index 30da577..0000000 --- a/training/dm_sc2_meta_clf.py +++ /dev/null @@ -1,49 +0,0 @@ -import pandas as pd -from meta import DMMetaManager -import pickle -from sklearn.model_selection import GridSearchCV -from sklearn.ensemble import RandomForestClassifier -from sklearn.preprocessing import Imputer -from sklearn.metrics import roc_auc_score -from sklearn.pipeline import Pipeline - -fea_all_k3_nAll_train_mean_val = pd.read_csv('m5_ftu_feaAll_k3_train_mean_pred.tsv', sep="\t") -fea_all_k3_nAll_test_mean_val = pd.read_csv('m5_ftu_feaAll_k3_test_mean_pred.tsv', sep="\t") -subj_train = fea_all_k3_nAll_train_mean_val['subjectId'].unique() -subj_test = fea_all_k3_nAll_test_mean_val['subjectId'].unique() -# => Load exam_df from an external file. -exam_df = pickle.load(open('exam_df.pkl')) -man = DMMetaManager(exam_df=exam_df) -exam_df_train = man.get_flatten_2_exam_dat(subj_train, 'm5_ftu_feaAll_k3_train_mean_pred.tsv') -exam_df_test = man.get_flatten_2_exam_dat(subj_test, 'm5_ftu_feaAll_k3_test_mean_pred.tsv') - -# Random forest. -imp = Imputer(missing_values='NaN', strategy='mean') -rf = RandomForestClassifier(n_estimators=50, class_weight='balanced', - random_state=12345, n_jobs=-1) -imp_clf = Pipeline([('imp', imp), ('clf', rf)]) - -rf_grid_param1 = { - 'clf__min_samples_split': [2, 100, 200, 300], - 'clf__max_depth': range(3, 10, 2), -} - -rf_gsearch1 = GridSearchCV( - estimator=imp_clf, - param_grid=rf_grid_param1, - scoring='roc_auc', n_jobs=1, cv=5, -) -rf_gsearch1.fit(exam_df_train[0], exam_df_train[1]) - -print rf_gsearch1.best_params_ -print '='*10 -print rf_gsearch1.best_score_ -print '='*10 -print roc_auc_score( - exam_df_test[1], rf_gsearch1.predict_proba(exam_df_test[0])[:,1]) - -pickle.dump(rf_gsearch1.best_estimator_, open('model5_ftu_based_meta_clf.pkl', 'w')) - - - - diff --git a/training/dm_xgb_train.py b/training/dm_xgb_train.py deleted file mode 100644 index 9b9d250..0000000 --- a/training/dm_xgb_train.py +++ /dev/null @@ -1,50 +0,0 @@ -# !!! This is not a standalone program. Copy&paste to Python console to run. !!! # -import xgboost as xgb -import pandas as pd -import numpy as np -from sklearn.model_selection import train_test_split -import pickle -from meta import DMMetaManager - - -# =========== Load and construct training data =========== # -# meta_man = DMMetaManager(exam_tsv=exam_tsv, img_tsv=img_tsv, -# img_folder=img_folder, img_extension=img_extension) -exam_df = pickle.load(open('exam_df.pkl')) -meta_man = DMMetaManager(exam_df=exam_df) -subj_list, labs_list = man.get_subj_labs() -subj_train, subj_test, labs_train, labs_test = train_test_split( - subj_list, labs_list, test_size=8000, stratify=labs_list, random_state=12345) -X_train, y_train = man.get_flatten_2_exam_dat(subj_train, 'predictions_max_corrected.tsv') -X_test, y_test = man.get_flatten_2_exam_dat(subj_test, 'predictions_max_corrected.tsv') -# ============= Train xgb ============= # -dtrain = xgb.DMatrix(X_train, y_train) -dtest = xgb.DMatrix(X_test, y_test) -param = {'colsample_bytree': 0.5, - 'eta': 0.02, - 'eval_metric': ['logloss', 'auc'], - 'max_depth': 5, - 'min_child_weight': 1, - 'objective': 'binary:logistic', - 'scale_pos_weight': 5, - 'seed': 12345, - 'silent': 1, - 'subsample': 0.8} -num_round = 500 -early_stopping_rounds = 20 -watchlist = [ (dtrain, 'train'), (dtest, 'eval') ] -bst = xgb.train(param, dtrain, num_round, watchlist, - early_stopping_rounds=early_stopping_rounds) -test_pred = bst.predict(dtest, ntree_limit=bst.best_ntree_limit) -from sklearn.metrics import roc_auc_score -roc_auc_score(y_test, test_pred) -# ============ Feature importance ============== # -xgb.plot_importance(bst, importance_type='weight') -import matplotlib.pyplot as plt -plt.show() -# ============ Save model ============== # -pickle.dump(bst, open('xgb_2017-01-25-10am/bst_model.pkl', 'w')) -np.savez_compressed( - 'xgb_2017-01-25-10am/xgb_param.npz', - early_stopping_rounds=early_stopping_rounds, - num_round=num_round, param=param) diff --git a/training/finetune_patch_clf.sh b/training/finetune_patch_clf.sh deleted file mode 100755 index 6586692..0000000 --- a/training/finetune_patch_clf.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -EXAM_TSV="/metadata/exams_metadata.tsv" -IMG_FOLDER="trainingData" -IMG_EXT="dcm" -DL_STATE="/3cls_best_model6.h5" -BEST_MODEL="/modelState/3cls_best_model6_finetuned.h5" -TRAIN_OUT="/scratch/train" -VAL_OUT="/scratch/val" -TEST_OUT="/scratch/test" -OUT_EXT="png" -NEG_NAME="benign" -POS_NAME="malignant" -BKG_NAME="background" -OUT="/modelState/3cls_best_model6_subjs.pkl" - -echo -n "Start training: " && date -echo - -mkdir -p $TRAIN_OUT/$NEG_NAME $TRAIN_OUT/$POS_NAME $TRAIN_OUT/$BKG_NAME -mkdir -p $VAL_OUT/$NEG_NAME $VAL_OUT/$POS_NAME $VAL_OUT/$BKG_NAME -mkdir -p $TEST_OUT/$NEG_NAME $TEST_OUT/$POS_NAME $TEST_OUT/$BKG_NAME - -python dm_patchClf_finetune.py \ - --img-extension $IMG_EXT \ - --img-height 4096 \ - --img-scale 255.0 \ - --equalize-hist \ - --featurewise-center \ - --featurewise-mean 91.6 \ - --neg-vs-pos-ratio 1.0 \ - --test-size 0.15 \ - --val-size 0.1 \ - --net resnet50 \ - --batch-size 200 \ - --train-bs-multiplier 0.5 \ - --patch-size 256 \ - --stride 256 \ - --roi-cutoff 0.9 \ - --bkg-cutoff 0.5 0.9 \ - --sample-bkg \ - --train-out $TRAIN_OUT \ - --test-out $TEST_OUT \ - --val-out $VAL_OUT \ - --out-img-ext $OUT_EXT \ - --neg-name $NEG_NAME \ - --pos-name $POS_NAME \ - --bkg-name $BKG_NAME \ - --augmentation \ - --load-train-ram \ - --load-val-ram \ - --no-top-layer-nb \ - --nb-epoch 0 \ - --top-layer-epochs 3 \ - --all-layer-epochs 10 \ - --optim nadam \ - --init-lr 0.01 \ - --top-layer-multiplier 0.01 \ - --all-layer-multiplier 0.0001 \ - --es-patience 5 \ - --lr-patience 2 \ - --weight-decay2 0.0001 \ - --bias-multiplier 0.1 \ - --hidden-dropout2 0.0 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --out $OUT \ - $IMG_FOLDER $DL_STATE $BEST_MODEL - -echo -echo -n "End training: " && date - - - - - - - - - - - - - diff --git a/training/finetune_patch_clf_local.sh b/training/finetune_patch_clf_local.sh deleted file mode 100755 index 1c1bb6d..0000000 --- a/training/finetune_patch_clf_local.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/bin/bash - -# export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="./metadata/images_crosswalk.tsv" -EXAM_TSV="./metadata/exams_metadata.tsv" -IMG_FOLDER="trainingData" -IMG_EXT="dcm" -DL_STATE="modelState/2017-04-19_patch_im4096_256/3cls_best_model5.h5" -BEST_MODEL="modelState/2017-04-19_patch_im4096_256/3cls_best_model5_pilot_finetuned.h5" -#BEST_MODEL="scratch/no_use.h5" -TRAIN_OUT="scratch/train_s128" -VAL_OUT="scratch/val_s128" -TEST_OUT="scratch/test_s128" -OUT_EXT="png" -NEG_NAME="benign" -POS_NAME="malignant" -BKG_NAME="background" -OUT="./modelState/2017-04-19_patch_im4096_256/3cls_best_model5_pilot_subjs.pkl" -#OUT="scratch/temp_subjs.pkl" - -echo -n "Start training: " && date -echo - -rm -rf $TRAIN_OUT $VAL_OUT $TEST_OUT - -mkdir -p $TRAIN_OUT/$NEG_NAME $TRAIN_OUT/$POS_NAME $TRAIN_OUT/$BKG_NAME -mkdir -p $VAL_OUT/$NEG_NAME $VAL_OUT/$POS_NAME $VAL_OUT/$BKG_NAME -mkdir -p $TEST_OUT/$NEG_NAME $TEST_OUT/$POS_NAME $TEST_OUT/$BKG_NAME - -python dm_patchClf_finetune.py \ - --img-extension $IMG_EXT \ - --img-height 4096 \ - --img-scale 255.0 \ - --equalize-hist \ - --featurewise-center \ - --featurewise-mean 91.6 \ - --no-neg-vs-pos-ratio \ - --test-size 0.15 \ - --val-size 0.1 \ - --net resnet50 \ - --batch-size 64 \ - --train-bs-multiplier 0.5 \ - --patch-size 256 \ - --stride 128 \ - --roi-cutoff 0.9 \ - --bkg-cutoff 0.5 0.9 \ - --sample-bkg \ - --train-out $TRAIN_OUT \ - --test-out $TEST_OUT \ - --val-out $VAL_OUT \ - --out-img-ext $OUT_EXT \ - --neg-name $NEG_NAME \ - --pos-name $POS_NAME \ - --bkg-name $BKG_NAME \ - --augmentation \ - --load-train-ram \ - --load-val-ram \ - --no-top-layer-nb \ - --nb-epoch 1 \ - --top-layer-epochs 5 \ - --all-layer-epochs 15 \ - --optim nadam \ - --init-lr 0.01 \ - --top-layer-multiplier 0.01 \ - --all-layer-multiplier 0.0001 \ - --es-patience 5 \ - --lr-patience 2 \ - --weight-decay2 0.0001 \ - --bias-multiplier 0.1 \ - --hidden-dropout2 0.0 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --out $OUT \ - $IMG_FOLDER $DL_STATE $BEST_MODEL - -echo -echo -n "End training: " && date - - - - - - - - - - - - - diff --git a/training/load_dat_util.py b/training/load_dat_util.py deleted file mode 100644 index 9ceec84..0000000 --- a/training/load_dat_util.py +++ /dev/null @@ -1,12 +0,0 @@ -from meta import DMMetaManager -meta_man = DMMetaManager(img_folder='preprocessedData/png_288x224/', img_extension='png') -exam_list = meta_man.get_flatten_exam_list() -img_list = meta_man.get_flatten_img_list() -from dm_image import DMImageDataGenerator -img_gen = DMImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True) -img_gen.mean = 7772. -img_gen.std = 12187. -datgen_exam = img_gen.flow_from_exam_list(exam_list, target_size=(288, 224), batch_size=8, shuffle=False, seed=123) -datgen_image = img_gen.flow_from_img_list(img_list[0], img_list[1], target_size=(288, 224), batch_size=32, shuffle=False, seed=123) -import numpy as np - diff --git a/training/prob_heatmap_clf_dist.ipynb b/training/prob_heatmap_clf_dist.ipynb deleted file mode 100644 index 6174d96..0000000 --- a/training/prob_heatmap_clf_dist.ipynb +++ /dev/null @@ -1,2931 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Populating the interactive namespace from numpy and matplotlib\n" - ] - } - ], - "source": [ - "%pylab inline\n", - "import pickle\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "phm_list = pickle.load(open('3cls_best_model5_finetuned_phm_s128.pkl'))\n", - "subj_lists = pickle.load(open('3cls_best_model5_subjs.pkl'))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2148\n", - "['R', 'L']\n", - "['CC', 'MLO', 'cancer']\n", - "(23, 11, 3)\n" - ] - } - ], - "source": [ - "print len(phm_list)\n", - "print phm_list[0][2].keys()\n", - "print phm_list[0][2]['L'].keys()\n", - "print phm_list[0][2]['L']['CC'][0].shape" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1023\n", - "114\n", - "201\n" - ] - } - ], - "source": [ - "subj_train, subj_val, subj_test = subj_lists\n", - "print len(subj_train)\n", - "print len(subj_val)\n", - "print len(subj_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1818\n", - "330\n" - ] - } - ], - "source": [ - "phm_list_train = [ phm for phm in phm_list if not phm[0] in subj_test]\n", - "phm_list_test = [ phm for phm in phm_list if phm[0] in subj_test]\n", - "print len(phm_list_train)\n", - "print len(phm_list_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0 L = 0\n", - "0 R = 0\n", - "1 L = 0\n", - "1 R = 1\n", - "2 L = 1\n", - "2 R = 0\n", - "3 L = 1\n", - "3 R = 0\n", - "4 L = 0\n", - "4 R = 1\n", - "5 L = 0\n", - "5 R = 0\n", - "6 L = 0\n", - "6 R = 1\n", - "7 L = 1\n", - "7 R = 0\n", - "8 L = 0\n", - "8 R = 1\n", - "9 L = 1\n", - "9 R = 0\n" - ] - } - ], - "source": [ - "for i,phm in enumerate(phm_list[:10]):\n", - " print i, 'L =', phm[2]['L']['cancer']\n", - " print i, 'R =', phm[2]['R']['cancer']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Explore one case" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "subj = 27\n", - "exam = 2\n", - "L = 0\n", - "R = 1\n" - ] - } - ], - "source": [ - "print 'subj =', phm_list[1][0]\n", - "print 'exam =', phm_list[1][1]\n", - "print 'L =', phm_list[1][2]['L']['cancer']\n", - "print 'R =', phm_list[1][2]['R']['cancer']" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAl0AAAFmCAYAAABX1y/YAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XuUnXV97/HPZ26MJAESAiGECBYiGmkJrjRqQeVSMVAk\naNVCLVLFE3VBldaelmO7vJVzFq0VjxYLTSWALSIqIFEjkFIoshQkpOESQiBggAwhMRdyZZjsme/5\nY55wdoY9mefZl2ff3q+1smbvZ3/38/vu8Yv7O8/l93NECAAAALXVUe8EAAAA2gFNFwAAQA5ougAA\nAHJA0wUAAJADmi4AAIAc0HQBAADkgKYLAAAgBzRdAAAAOaDpAgAAyEFXvRMAgEblrt5wz/h6p4Ea\nioEdikK/a7X/yQdOiCMPmzxm3H/37Uy3w6GhCjPKgVP+OltoRZx4edPGiDhkrDiaLgAYhXvGq+vY\ns+udBmqosGpRTfd/5GGT9Yurvjxm3MS/eTDV/gr9KZuzOnJHupNo0QwNZEq7l1/7bJo4Ti8CaBu2\n59peZXu17UvrnQ+aE3WEctF0AWgLtjslfUvSGZJmSjrP9sz6ZoVmQx2hEjRdANrFHEmrI+KZiBiQ\n9D1J8+qcE5oPdYSy0XQBaBfTJD1f9Hxtsm0vtufbXmp7aRT6c0sOTWPMOiquod+8tD3X5NDYaLoA\noEhELIiI2REx21299U4HTai4hg45aEK900EDoekC0C76JE0ven5Esg3IgjpC2Wi6ALSLByXNsP0G\n2z2SzpVU2/kC0IqoI5SNeboAtIWIKNi+WNIdkjolLYyIFXVOC02GOkIlaLoAtI2IWCxpcdr4jp5e\nHTDtjWPGbV+3Ot34LTQZZGdPuuvdBgda72aELHX0Su9Benrm2Dc3Dg7cW2laDSNtnbfjJKqcXgQA\nAMgBTRcAAEAOaLoAAAByQNMFAACQA5ouAACAHNB0AQAA5ICmCwAAIAc0XQAAADmg6QIAAMgBM9ID\nwCiGBvq1re/J3MedOfeDqWMfv/2HNcxkdEOFgbqM22weX71WJ/zB58aMa6VZ19N6y5np6vyxn3y/\nxpnkhyNdAAAAOaDpAgAAyAFNFwAAQA5ougAAAHJA0wUAAJADmi4AAIAc0HQBAADkgKYLAAAgBw05\nOaq7esM94zO8wdn239GZMSMphgYzviEyj9FOYmCHotCf7X+4DCZOOjgOn/761PFrfrMz0/77t2/P\nmpJiqJDxDdTQvtS6hgCg2hqz6eoZr65jz04f35HtgF3P+ElZU9LAjs2Z4ttxduEsCqsW1XT/h09/\nvW66/Z7U8R/7lwcy7X/Vf92dMaPsNTRU2J15jHZS6xqqp3rNMp9F2v+P6+jqThXXsvUekep3lfZ7\nrJW+W9LONN9Kv5uKTi/anmt7le3Vti8t8fp+tm9KXn/A9lGVjIfWRB0BANpB2U2X7U5J35J0hqSZ\nks6zPXNE2IWStkTEMZK+Lunvyx0PrYk6AgC0i0qOdM2RtDoinomIAUnfkzRvRMw8Sdcnj38o6TQ7\n4wVYaHXUEQCgLVTSdE2T9HzR87XJtpIxEVGQtFXSwRWMidZDHQEA2kLDTBlhe77tpbaXRqG/3umg\nCRXX0JZNm+qdDgAAe6mk6eqTNL3o+RHJtpIxtrskHSip5LdhRCyIiNkRMdtdvRWkhSZTtToqrqGJ\nB3MgDADQWCppuh6UNMP2G2z3SDpX0sh7uBdJuiB5/EFJ/xnB5EPYC3UEAGgLZc/TFREF2xdLukNS\np6SFEbHC9lckLY2IRZKukfRvtldL2qzhL1TgVdQRAKBdVDQ5akQslrR4xLYvFD3ul/ShSsZA66OO\nAADtoCFnpM9qynHvyhT/4iP3ZB4j66z3+x0wOVP8K9s2ZorHvj3xzIv6vT++PHX8D9fdmWn/M+67\nJ2NG0pyPX50pflvfqkzxzTAbM+pn8pvelipu4xPZVmcYS8vONF9lzfDfb8/4ianidu/amiou7Wfu\n3v/AVHEDO7akiqunhrl7EQAAoJXRdAEAAOSApgsAACAHNF0AAAA5oOkCAADIAU0XAABADmi6AAAA\nckDTBQAAkAOaLgAAgBzQdAEAAOSgJZYBAgDsW7WX90F1pV1qrp7LBdVrmZ1mWN4nrZZounZteiFT\n/Ja/+q3MY0z6xzWZ35NF1rUdm2GdrnqKoYIGdmxOHb/yhe2Z9v/kk9nXyuyZkG7dsj3uOeCpTPGn\n7npLpvjBgf5M8a3A9hpJ2yUNSipExOz6ZoRmRB2hXC3RdAFABqdEBCvMo1LUETLjmi4AAIAc0HQB\naCch6U7bD9meX+9k0LSoI5SF04sA2slJEdFn+1BJS2w/ERH3FgckX6LDX6Td4+qQIprAPuuIGsJo\nONIFoG1ERF/yc4OkWyXNKRGzICJmR8Rsd/XmnSKawFh1RA1hNDRdANqC7XG2J+x5LOl0SY/VNys0\nG+oIleD0IoB2MUXSrbal4f/v+25E3F7flNCEqCOUjaYLQFuIiGckHV/vPNDcqCNUouymy/Z0Sd/R\ncNcfkhZExDdGxJws6TZJv0423RIRXyl3TLQe6ghoLDPnfjBV3OO3/7Cq42aZILoVJ4fe9OfTU8VN\n+tqzqeLq+fvs6OpOFTdU2F2XcWsxdlqVHOkqSPpcRCxLzm8/ZHtJRDw+Iu7nEXFWBeOgtVFHAIC2\nUPaF9BGxLiKWJY+3S1opaVq1EkN7oI4AAO2iKtd02T5K0gmSSq2o+g7bD0t6QdJfRsSKaoxZbFvf\nk5niJ/5D9jEmTD06U/z2dU9nHySDVlyrsap1FJHp8PH/PPqk1LGSdMi37soUL0mHvP7wTPHvfjxb\nzXX2ZApvyRoCgEZWcdNle7ykmyVdEhHbRry8TNKREbHD9pmSfiRpxij7YTK5NlaNOqKGAACNrKJ5\numx3a/iL8oaIuGXk6xGxLSJ2JI8XS+q2PbnUvphMrn1Vq46oIQBAIyu76fLwJCXXSFoZEVeMEnNY\nEifbc5LxNpU7JloPdQQAaBeVnF48UdL5kh61vTzZ9nlJr5ekiLha0gclfdp2QdLLks6NiKhgTLQe\n6ggA0BbKbroi4j5JHiPmSklXljsGWh91BABoF6y9CAAAkAOWAQKAnKSdpqOe03NUe6b5tNp9SpK0\nM803g3rN9l6vcbPgSBcAAEAOaLoAAAByQNMFAACQA5ouAACAHNB0AQAA5IC7F1Pq6Mq2mnDP+ImZ\n4gd2bMkUn/VOn6z5SNlzaii2Orq6U4dnvetlyzMPZ81Iy2dnW+v9ihX3ZIr/+vHvyRSfVWdP9qWV\nBgf6a5AJADQnjnQBAADkgKYLAAAgBzRdAAAAOeCaLgDISbvPuo78ZKm1tNf8NvV1vg2CI10AAAA5\noOkCAADIAU0XAABADmi6AAAAckDTBQAAkAOaLgAAgBzQdAEAAOSAebpS2vr8ypruP8s6gVL2tQJ7\nxh2YKV6SBgdezhjfOOvsve7AiXrzGR9IHf/Ij2/KtP9C/86sKWnm3W/OFL/rd8/OFH/RA7dliv/W\nCadnij9idva1HZ/9xY8zvwcAWhVHugAAAHLAkS4AQGYHHXlcqriXnn2s6mN39Y5LFVfOEel6mTD1\n6FRx7uhMFbdj/a9Tj12vmebf/pGPpop74MZ/TxWX9ncjZT9bVC0VH+myvcb2o7aX215a4nXb/qbt\n1bYfsf3WSsdEa6GGAADtoFpHuk6JiI2jvHaGpBnJv7dJuir5CRSjhgAALS2Pa7rmSfpODLtf0kG2\np+YwLloHNQQAaHrVaLpC0p22H7I9v8Tr0yQ9X/R8bbJtL7bn215qe2kUGucuOOSi6jVU2LW1RqkC\nAFCeapxePCki+mwfKmmJ7Sci4t6sO4mIBZIWSFLH/pOjCnmheVS9hsYd/kZqCADQUCo+0hURfcnP\nDZJulTRnREifpOlFz49ItgGSqCEAQHuoqOmyPc72hD2PJZ0uaeT9wYskfTS5A+3tkrZGxLpKxkXr\noIYAAO2i0tOLUyTdanvPvr4bEbfb/pQkRcTVkhZLOlPSakm7JH2swjHRWqghAEBbqKjpiohnJB1f\nYvvVRY9D0kWVjIPWRQ0BANpFW85I39nTm/k97/jIeZni77v22kzxB0w7NlN81lmed/7muUzxktQ7\n8bBM8S9veiHzGLXy8kubM6+nmMW3n/tl5vecdHK2u3J/Z0u2WTE++M7XZ4rfec8dmeJ/cWq2tSCl\n7P+tNdL6ndi3Wsw0n1YzzTSf1vZ1T1d1f2ln7ZfqNzv7/Td8J1Vc9/4HpIrbvWtbJenkgrUXAQAA\nckDTBaCl2F5oe4Ptx4q2TbK9xPZTyc+J9cwRjY86Qi3QdAFoNddJmjti26WS7oqIGZLuSp4D+3Kd\nqCNUGU0XgJaSTKy7ecTmeZKuTx5fL+mcXJNC06GOUAtteSE9gLYzpWhutxc1PFVJSclSVMPLUXWn\nvxgZbSFVHVFDGA1HugC0lWQKklGXiYqIBRExOyJmuyv7nc5oD/uqI2oIo6HpAtAO1tueKknJzw11\nzgfNiTpCRWi6ALSDRZIuSB5fIOm2OuaC5kUdoSI0XQBaiu0bJf1S0rG219q+UNLlkt5j+ylJv588\nB0ZFHaEWuJAeQEuJiNGWjzgt10TQ1KpVR93jDtS0OX8wZtzaX/00y27H1Eqz9qedaX7SMW9Nvc+X\n1jyaKq7as/VzpAsAACAHbXmka6gwkPk9WddSzKp/628yxXd0dWeKL6dbz7qWYtr1sSRpsKMzazrZ\n2HJH7f6mWLn9lczv+cQ92e5imjor/V9tkvTLH/44U/zDl307U/wTP/xupvhyZKpru3aJAEANcKQL\nAAAgBzRdAAAAOaDpAgAAyAFNFwAAQA5ougAAAHJA0wUAAJADmi4AAIActOU8XQCQSsr53mJoKIdk\nmlPP+Imp4gZ2bKn62KnmfavxfG+F/p36zRMP1HSMUrp6x6WOrfbs9WnnSEz7380B096YKm7z6mWp\n4uqp7CNdto+1vbzo3zbbl4yIOdn21qKYL1SeMloJdQQAaBdlH+mKiFWSZkmS7U5JfZJuLRH684g4\nq9xx0NqoIwBAu6jWNV2nSXo6Ip6t0v7QnqgjAEDLqtY1XedKunGU195h+2FJL0j6y4hYUaUxW0r/\nS+szxddyXcFypV0JXpJiaLDU5qrVkd2hrt7xqfPJkrskvfe3D80UL0lXZPwbZ93yuzLFX3bin2SK\nH7z7nkzxeci0RmhE7RIBgBqo+Jvbdo+ksyX9oMTLyyQdGRHHS/onST/ax37m215qe2kU+itNC02m\nGnW0Vw3tfrl2yQIAUIZqHC45Q9KyiHjNoZqI2BYRO5LHiyV1255caicRsSAiZkfEbHf1ViEtNJmK\n62ivGup+Xe0zBgAgg2o0XedplFNCtg+zh+/HtT0nGW9TFcZE66GOAAAtraJrumyPk/QeSZ8s2vYp\nSYqIqyV9UNKnbRckvSzp3AguxMDeqCMAQDuoqOmKiJ2SDh6x7eqix1dKurKSMdD6qCMAQDtovFvg\nAAAAWhDLAAHAaCJY4qdCtVjeJ63u/Q8cM2Z3R2dNc4jBgl7ZtrGmY5RS7aV9sjj4jb+bKm5jyuWR\ntvU9WUk6FdnxH5elittv8rWp4jjSBQAAkAOaLgAAgBzQdAEAAOSApgsAACAHbXkhfStcGNsKn6Gm\nOjrU9brarb34zxd/M2tG2nVKd6b4cR+6euygIjvWr8kUDwDIF0e6AAAAckDTBQAAkAOaLgAAgBzQ\ndAEAAOSgLS+kBwBUxh3p/mav500/v/fheWPG3P/rRTlkkr+OrvQ37gwVdld1bKec5f/I33tfqrhn\nf/HjStKpyL8/U6jq/jjSBQAAkAOaLgAAgBzQdAEAAOSApgsAACAHNF0AAAA5oOkCAADIQUtMGXHO\nZz6VKf6GMyZlHqNzx8ZM8b0fWpB5jGa38TPTUse++8s9NcxEisJuvbzphdTxT83Ndlv7dUccmDUl\nffXX2f7G6epNv3akJA3s2JIpvhF19Y5LHVtIOWUBADQK/l8LAAAgBzRdAAAAOUh1etH2QklnSdoQ\nEccl2yZJuknSUZLWSPpwRLzm/IbtCyT9bfL0soi4vvK00WwuWvhj3f7wUzrkgHG6/+8+KYkaAppZ\nPWeaT+utf37+mDGPFPpqm4Q71NnTO2bY4EB/VYet9izzWaS9tGPb2idTxV16+V+livvqF76ZKk6S\nhgoDqeI+8xffSL3PNNIe6bpO0twR2y6VdFdEzJB0V/J8L8mX6hclvU3SHElftD2x7GzRtP74xN/R\nzX9x3sjN1BCqzvZC2xtsP1a07Uu2+2wvT/6dWc8c0fioI9RCqqYrIu6VtHnE5nmS9hxxuF7SOSXe\n+l5JSyJic3IEY4le27yhDZx47JGaOO51IzdTQ6iF61S6Rr4eEbOSf4tzzgnN5zpRR6iySq7pmhIR\n65LHL0qaUiJmmqTni56vTbYBEjWEGhjlj0QgE+oItVCVC+kjIiRFJfuwPd/2UttLo1Ddc9tofNQQ\ncnCx7UeS00acoka5qCOUrZKma73tqZKU/NxQIqZP0vSi50ck214jIhZExOyImO2usS86REughpCX\nqyQdLWmWpHWSvjZaIM079iFVHe1dQy/nmR8aXCVN1yJJFySPL5B0W4mYOySdbnti8hfB6ck2QKKG\nkJOIWB8RgxExJOlfNXxTxmixNO8oKW0d7V1Dr7mWFW0s7ZQRN0o6WdJk22s1fDfZ5ZK+b/tCSc9K\n+nASO1vSpyLiExGx2fbfSXow2dVXIoJz5G3o41ffovtWPadNO3bpzZ/7hjrdIVFDyIntqUXXD75f\n0mP7igdKoY5QqVRNV0S85l7/xGklYpdK+kTR84WSFpaVHVrGwk99YK/n7/7yNXpu00ubRA2hykb5\nI/Fk27M0fN3gGkmfrFuCaArUEWqhJdZe/NE3r84UPy79/GmvOujI4zLFd3R1Z4qv50R21TL5m+kn\nGSxsSDcxXV5m3J7tTLvv/GrmMXbe8meZ4j9zydRM8Qddlm3txUas0UL/zvTBo0zOOcofideUlxHa\nFXWEWmiJpgsA0Jh6Dyo1E8xr9b+0vupj/9kX3jtmzOKrbq76uHuJoarONp/2j6UsfyS968KPp4q7\n95p0Jxx2rF+TKu6GvvtTxT35/ltSxQ0e+65UcVkc8qa3p4rbeV+6/bH2IgAAQA5ougAAAHJA0wUA\nAJADmi4AAIAc0HQBAADkgKYLAAAgBzRdAAAAOaDpAgAAyAFNFwAAQA6YkR4AUDO1mGk+raNufmXM\nmMKW0stJVUvPhIN01DvnjRn3zM9vS7W/WizH9fNrr0sV19U7LlVc2uW8zj/ynanidtzymVRxXznn\n66niJClGWUZspL6lP0u9zzQ40gUAAJADjnSltH3d6kzxrbCAdSMuiNwoPrbsPzK/59Qf/Emm+Ptv\neDzzGFnE0GCm+Lec+aHMY6xY/IPM7wGAVsWRLgAAgBzQdAEAAOSApgsAACAHNF0AAAA5oOkCAADI\nAU0XAABADmi6AAAAcsA8XQBQobRz2k065q2p4jY+8UAl6bS8P7zk06nibv6/V9U4k7ENbH8p1Wzz\nR7977FnrJWnx356aKm7mH/x1qjhJGioMVDUu/bjp5nY84MNXp4pLO8t8PY3ZdNleKOksSRsi4rhk\n21clvU/SgKSnJX0sIl4q8d41krZLGpRUiIjZ1UsdzaTw3H2Kbc9LXb3qftP7JVFHAID2kub04nWS\n5o7YtkTScRHxO5KelPS/9vH+UyJiFl+U7a1j0jHq+q33jNxMHQEA2saYTVdE3Ctp84htd0ZEIXl6\nv6QjapAbWkjH+MOkzv322kYdAQDaSTWu6fq4pJtGeS0k3Wk7JP1LRCwYbSe250uaL0nqTreSebk6\ne3pruv9WkfNaihXXUXENuWe8unrT11Ghf2emZE8/NHuN3r7yxczvycId2e6LyXr9w9aNuzLFAwD2\nVlHTZftvJBUk3TBKyEkR0Wf7UElLbD+RHDl7jeSLdIEkdew/OSrJC82lWnW0Vw2NP5QaAgA0lLKn\njLD9pxq+wP4jEVHyCy4i+pKfGyTdKmlOueOhNVFHAIB2UVbTZXuupL+SdHZElDznYHuc7Ql7Hks6\nXdJj5SaK1kMdAQDayZhNl+0bJf1S0rG219q+UNKVkiZo+FTPcttXJ7GH216cvHWKpPtsPyzpV5J+\nGhG31+RToOEV1tyjwlM/lfq3aveKmxSFfok6AgC0kTGv6YqI80psvmaU2BcknZk8fkbS8RVlh5bR\nddTJez0vrFqkoYEdx5SKpY4AAK2IGekBoEJp7/RlpvnquPXKb9c7hap7+r/GnrVeko5NGVcL9Zrx\n/fBZp6SKe+7+n9Q4k8qx9iIAAEAOaLoAAAByQNMFAACQA5ouAACAHNB0AQAA5KAt714cHOivdwqo\nsd8+ULr9fenvtDnyB9n2f+7hb8uYkaRld2Z/Twa1vrOob+nParp/AGh1HOkCAADIAU0XAABADmi6\nAAAActCW13QBAEpzR7q/xXvGT0oV98q2jZWkU1LaFQDQ2NLWWjPMNJ8WR7oAAAByQNMFoKXYnm77\nbtuP215h+7PJ9km2l9h+Kvk5sd65ojFRQ6gVmi4AraYg6XMRMVPS2yVdZHumpEsl3RURMyTdlTwH\nSqGGUBM0XQBaSkSsi4hlyePtklZKmiZpnqTrk7DrJZ1TnwzR6Kgh1ApNF4CWZfsoSSdIekDSlIhY\nl7z0oqQpdUoLTYQaQjXRdAFoSbbHS7pZ0iURsa34tYgISTHK++bbXmp7aRRYvaKdUUOoNpouAC3H\ndreGvyxviIhbks3rbU9NXp8qaUOp90bEgoiYHRGz3dWbT8JoONQQaoF5ulA1aedcGQ527RKRtGJH\nt95yb5Yj/2tqlUrZMv0+Vfu1Fzt7Xpf5PYX+nZniq1FDti3pGkkrI+KKopcWSbpA0uXJz9syJYe2\nQQ2hVmi6ALSaEyWdL+lR28uTbZ/X8Bfl921fKOlZSR+uU35ofNQQaoKmC0BLiYj7JI12KPW0PHNB\nc6KGUCtjNl22F0o6S9KGiDgu2fYlSf9D0m+SsM9HxOIS750r6RuSOiV9OyIur1LeaDKFZ+/V0Nbn\n5a5edc/8Q0nUEdCI0p6mrvbyPllOLdf6VDrycejME1PFrX/s5zXOJD9pqvw6SXNLbP96RMxK/pX6\nouyU9C1JZ0iaKem8ZHI5tKGOSTPUdcx7S71EHQEA2sKYTVdE3Ctpcxn7niNpdUQ8ExEDkr6n4Ynl\n0IY6JkyVO/cr563UEQCgJVQyZcTFth+xvXCU9aemSXq+6PnaZBtQjDoCALSFcpuuqyQdLWmWpHWS\nvlZpIkwm15aqWkd71dBAtqkKAACotbKarohYHxGDETEk6V81fApopD5J04ueH5FsG22fTCbXZqpd\nR3vVUM+46icMAEAFymq69szIm3i/pMdKhD0oaYbtN9jukXSuhieWAyRRRwCA9pJmyogbJZ0sabLt\ntZK+KOlk27M0vO7UGkmfTGIP1/At/WdGRMH2xZLu0PCt/gsjYkVNPgUaXuHXd2to+zqp0K+BR2+U\n3CFJ/0AdAQDaxZhNV0ScV2LzNaPEviDpzKLniyW9ZhoAtJ+uN5yy1/PdT9ymoVe2n18qljoCALQi\nZqRHSxra/Yp2rF9T7zQq0mgTQLqjs+ZjZPrMEbVLBABqgKYLAFAzE6YenSpu+7qna5wJGk0rzTSf\nViXzdAEAACAlmi4AAIAc0HQBAADkgKYLAAAgBzRdAAAAOaDpAgAAyAFNFwAAQA5ougAAAHJA0wUA\nAJADZqQHAGTmjnR/szPTPPD/0XShalg3r7Xt3rWt3ikAQFPj9CIAAEAOaLoAAAByQNMFAACQA5ou\nAACAHNB0AQAA5ICmCwAAIAc0XQAAADmg6QIAAMjBmJOj2l4o6SxJGyLiuGTbTZKOTUIOkvRSRMwq\n8d41krZLGpRUiIjZVcobTabw3H2Kbc9LXb3qftP7JVFHaAJ2qpnXM00M3CLa8TOX45DDp+gjX/rz\nMeP++bIrU+1vqLC70pRQR2lmpL9O0pWSvrNnQ0T80Z7Htr8maes+3n9KRGwsN0G0ho5Jx8iT36TC\ncz9/dRt1BABoJ2M2XRFxr+2jSr1m25I+LOnU6qaFVtMx/jDFK9tLvkYdAQDaQaVrL75T0vqIeGqU\n10PSnbZD0r9ExIIKx0OOOrq6M8VXcNi76nV0woypuv9nn0+dwAHzrkgdK0mDA/2Z4gEAqLTpOk/S\njft4/aSI6LN9qKQltp+IiHtLBdqeL2m+JKl7XIVpoclUpY6Ka+j10w6vTaYAAJSp7LsXbXdJ+oCk\nm0aLiYi+5OcGSbdKmrOP2AURMTsiZrurt9y00GSqWUfFNTT54Em1SBcAgLJVMmXE70t6IiLWlnrR\n9jjbE/Y8lnS6pMcqGA+tiToCALSFMZsu2zdK+qWkY22vtX1h8tK5GnFKyPbhthcnT6dIus/2w5J+\nJemnEXF79VJHMymsuUeFp34q9W/V7hU3KQqvXhNFHQEA2kKauxfPG2X7n5bY9oKkM5PHz0g6vsL8\n0CK6jjp5r+eFVYskUUcAgPbBjPQAAAA5qPTuRQBoXRENP/N61qldxsKM59XV09WhIybtP2Ycv/f2\nwJEuAC3F9nTbd9t+3PYK259Ntn/Jdp/t5cm/M+udKxoTNYRa4UgXgFZTkPS5iFiW3Pn6kO0lyWtf\nj4h/rGNuaA7UEGqCpgtAS4mIdZLWJY+3214paVp9s0IzoYZQK5xeBNCyknVjT5D0QLLpYtuP2F5o\ne2LdEkPToIZQTTRdAFqS7fGSbpZ0SURsk3SVpKMlzdLwUYyvjfK++baX2l5aNJ8c2lA1amjnS5tz\nyxeNj9OLbaJ7/wMyv2f3rm01yCQf//3UOo074//UO41c5bhAecOz3a3hL8sbIuIWSYqI9UWv/6uk\nn5R6b7Kg+gJJ6th/ctQ+WzSiatXQ9Df9NjWEV3GkC0BLsW1J10haGRFXFG2fWhT2frGcFEZBDaFW\nONIFoNWcKOl8SY/aXp5s+7yk82zPkhSS1kj6ZH3SQxOghlATNF0AWkpE3CfJJV5aXGIb8BrUEGqF\npgsAGoyiKaFSAAALF0lEQVQ70l/5EUODKeMae2b9VtX33Dpd+pn/Xe800CC4pgsAACAHNF0AAAA5\noOkCAADIAU0XAABADmi6AAAAckDTBQAAkAOaLgAAgBy05TxdE99wfOb3bPn1wzXIpHydPb2Z4pt5\nHcWy2BnnOso2h9GmS6ZnzUj3ffm2TPHnTJydKb6V11IEgFbAkS4AAIActOWRLgCoprRHVTt7Xpcq\nrtC/s5J0ADSoMf+fwvZ023fbftz2CtufTbZPsr3E9lPJz4mjvP+CJOYp2xdU+wOgOcTADhVW/0y7\nV96i3U/cqii8LIk6AgC0jzR/nhUkfS4iZkp6u6SLbM+UdKmkuyJihqS7kud7sT1J0hclvU3SHElf\nHO1LFS3OHeo8/HfV/eYPqGvGWVKhX9QRAKCdjNl0RcS6iFiWPN4uaaWkaZLmSbo+Cbte0jkl3v5e\nSUsiYnNEbJG0RNLcaiSO5uLu/eX9Jw8/7uyW3ClRRwCANpLpmi7bR0k6QdIDkqZExLrkpRclTSnx\nlmmSni96vjbZVmrf8yXNlyR1j8uSFppMvLJdikGpynW0Vw31UEMAgMaS+u5F2+Ml3SzpkojYa/6B\niAhJUUkiEbEgImZHxGx3ZZsOAc0jBnersOZuqXt/VbuO9q6hdBcsAwCQl1RNl+1uDTdcN0TELcnm\n9banJq9PlbShxFv7JBVPaHREsg1tKGJIg2v+Ux0Tf0vu3G/PZuoIANAW0ty9aEnXSFoZEVcUvbRI\n0p67yC6QVGrmxzsknW57YnLh8+nJNrSZiNDgc/fJ+x2kzkOPK36JOgIAtIU0R7pOlHS+pFNtL0/+\nnSnpcknvsf2UpN9Pnsv2bNvflqSI2Czp7yQ9mPz7SrINbSZ2blBseVpDO9Zp9xO3KfpfEnUEAGgn\nY15IHxH3SfIoL59WIn6ppE8UPV8oaWG5CaI1dIyfoo5ZH3v1eWHVIg3t2rg4eUodAQBaXlvOSN9o\n6yhK6We03mNwoL9GmbSGzv3216Rj3po6/h/++n2Z9n/whV/MmpJ0YPp8JEkZ14NE/aRdu5OZ5oH2\nxtqLAAAAOaDpAgAAyAFNFwAAQA5ougAAAHJA0wUAAJADmi4AAIAc0HQBAADkgKYLAAAgBzRdAAAA\nOaDpAgAAyEFbLgMEAEAjSbsUXNolp9CYGrLpipc3bdy9/NpnS7w0WdLGvPNpw3HzGPvIGu5bhS3P\nbXzx+xelrqGPfv+iWqazz7EZt2w1rSEAqLbGbLoiDim13fbSiJiddz7tNm69x66GRquheo7dbuMC\nQKPimi4AAIAc0HQBAADkoNmargWM2xZj11I7/k7bbVwAaEiOiHrnAAANqWP/ydF17Nn1TgM1VFi1\nSEO7NrpW+09bQ9y92Nx2L7/2oTTXsDbbkS4AAICmRNMFAACQg4ZrumzPtb3K9mrbl5Z4fT/bNyWv\nP2D7qCqNO9323bYft73C9mdLxJxse6vt5cm/L1Rp7DW2H032ubTE67b9zeQzP2L7rVUY89iiz7Hc\n9jbbl4yIqcnnzUM96qieNZTsmzoCgAbWUPN02e6U9C1J75G0VtKDthdFxONFYRdK2hIRx9g+V9Lf\nS/qjKgxfkPS5iFhme4Kkh2wvGTG2JP08Is6qwngjnRIRo00keYakGcm/t0m6KvlZtohYJWmW9Orv\nvU/SrSVCa/V5a6aOdVTvGpKoo7ro7OlNFTc40F/jTNCsuFarPTTaka45klZHxDMRMSDpe5LmjYiZ\nJ+n65PEPJZ1mu+KLICNiXUQsSx5vl7RS0rRK91sl8yR9J4bdL+kg21OruP/TJD0dEaVmcG9Gdamj\nBq8hqU3qyHav7V/Zfjg54vjlZPsbkqOaq5OjnD31zBONjTpCLTRa0zVN0vNFz9fqtV9ar8ZEREHS\nVkkHVzOJ5FTTCZIeKPHyO5L/CH9m+y1VGjIk3Wn7IdvzS7ye5vdSiXMl3TjKa7X4vLVW9zqqQw1J\n1NEer0g6NSKO1/BRuLm2367ho5lfj4hjJG3R8NFOYDTUEaqu0ZquurM9XtLNki6JiG0jXl4m6cjk\nP8J/kvSjKg17UkS8VcOnfy6y/a4q7XdMyV9pZ0v6QYmXa/V5W1qdakiijiRJyZG8HcnT7uRfSDpV\nw0c1peGjnOfUMg80N+oItdBoTVefpOlFz49ItpWMsd0l6UBJm6oxuO1uDX9Z3hARt4x8PSK27fmP\nMCIWS+q2PbnScSOiL/m5QcPXw8wZEZLm91KuMyQti4j1JfKqyefNQd3qqF41lOyPOkrY7rS9XNIG\nSUskPS3ppeSoplT9o3xoQdQRqq3Rmq4HJc1Izpn3aPh0xaIRMYskXZA8/qCk/4wqzPCaXM9zjaSV\nEXHFKDGH7bnux/YcDf/+Kvqitj0uuehatsdJOl3SYyPCFkn6aHL32dslbY2IdZWMW+Q8jXJKqBaf\nNyd1qaN61VCyL+qoSEQMRsQsDTeWcyS9Ke17bc+3vdT20ihw4Xs7K7eOqCGMpqHuXoyIgu2LJd0h\nqVPSwohYYfsrkpZGxCINf6n9m+3VkjZr+Au1Gk6UdL6kR5O/bCTp85Jen+R2tYa/nD9tuyDpZUnn\nVqHhmyLp1uQ7qUvSdyPidtufKhp3saQzJa2WtEvSxyocU9KrX87vkfTJom3F49bi89ZcHeuoXjUk\nUUclRcRLtu+W9A4N3zjQlRylGPUoX0QsULKEUcf+kxu+3lF7WeuIGsJoWAYIQEuxfYik3ckX5esk\n3anhi58vkHRzRHzP9tWSHomIf97XvtIu4cKUEc1rtGWAqlVHLCXVHtIuA9RQR7oAoAqmSrrew/OG\ndUj6fkT8xPbjkr5n+zJJ/63ho53AaKgjVB1NF4CWEhGPaHi6jpHbn9Frby4ASqKOUAs0XQBQoRga\nrHcKaFS23DH2PWvMSN8eGu3uRQAAgJZE0wUAAJADmi4AAIAc0HQBAADkgKYLAAAgBzRdAAAAOaDp\nAgAAyAFNFwAAQA5ougAAAHLAgtcAMArbv5H07IjNkyVtrEM61dYqn0Oq7LMcGRGHVDOZYi1eQ1Lr\nfJZKP0eqOqLpAoAMbC+NiNn1zqNSrfI5pOb7LM2W7760ymfJ63NwehEAACAHNF0AAAA5oOkCgGwW\n1DuBKmmVzyE132dptnz3pVU+Sy6fg2u6AAAAcsCRLgAAgBzQdAFACrbn2l5le7XtS+udTyVsr7H9\nqO3ltpfWO58sbC+0vcH2Y0XbJtleYvup5OfEeua4L61SR9RQeWi6AGAMtjslfUvSGZJmSjrP9sz6\nZlWxUyJiVhPe7n+dpLkjtl0q6a6ImCHpruR5w2nBOqKGMqLpAoCxzZG0OiKeiYgBSd+TNK/OObWl\niLhX0uYRm+dJuj55fL2kc3JNKj3qqAHUs4ZougBgbNMkPV/0fG2yrVmFpDttP2R7fr2TqYIpEbEu\nefyipCn1TGYfWqmOqKEydNVipwCAhnZSRPTZPlTSEttPJH/9N72ICNvcll971FAZONIFAGPrkzS9\n6PkRybamFBF9yc8Nkm7V8GmvZrbe9lRJSn5uqHM+o2mZOqKGykPTBQBje1DSDNtvsN0j6VxJi+qc\nU1lsj7M9Yc9jSadLemzf72p4iyRdkDy+QNJtdcxlX1qijqih8nF6EQDGEBEF2xdLukNSp6SFEbGi\nzmmVa4qkW21Lw98B342I2+ubUnq2b5R0sqTJttdK+qKkyyV93/aFkp6V9OH6ZTi6FqojaqjcsZmR\nHgAAoPY4vQgAAJADmi4AAIAc0HQBAADkgKYLAAAgBzRdAAAAOaDpAgAAyAFNFwAAQA5ougAAAHLw\n/wDhsaPVuBWdlwAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "l_cc_phm = phm_list[1][2]['L']['CC'][0]\n", - "l_mlo_phm = phm_list[1][2]['L']['MLO'][0]\n", - "fig,ax = subplots(1, 4)\n", - "fig.set_size_inches([10, 8])\n", - "ax[0].imshow(l_cc_phm[:,:,1], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[1].imshow(l_cc_phm[:,:,2], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[2].imshow(l_mlo_phm[:,:,1], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[3].imshow(l_mlo_phm[:,:,2], cmap='RdBu_r', vmin=0, vmax=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": false, - "scrolled": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAGDCAYAAAAPl5VaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xu8VfV55/Hvc24crspFCUUajSIN0wbSEkyiSSQxhmgS\nNEkTbZsyLRM609ho4mTKZNLReU0nLzUXp52YTLAS6IzR2iQMmFqVoB3iJSoqRryCCANH4ADKncM5\n+5xn/jiL5hTPgd/av7322mvvz/v14rUv52HtZ5Mn8mXt9ds/c3cBAACgPE15NwAAAFBkhCkAAIAI\nhCkAAIAIhCkAAIAIhCkAAIAIhCkAAIAIhCkAAIAIhCkAAIAIhCkAAIAILXk3AADVZi3tbm2j8m4j\nnFl4LbtaSJK8+6C81JXiDy69ep4ja2pOdWjvLWXSR5p5Hn/kQPhxAx1QSV3ee9KGCVMAGo61jVLL\ntE/k3UYwawr/EMH7+jLspDhKL63M/DXqeY7aRo1Ldeyj+3dn0keaef7UugeCa0P9WNuD6viYD0Dh\nmdlcM3vJzDaa2aK8+0ExMUcoF2EKQKGZWbOkWyR9VNJ0SVea2fR8u0LRMEeIQZgCUHSzJW10903u\n3i3pTknzcu4JxcMcoWyEKQBFN1nS1gGPtyXP/QtmttDM1prZWi91Va05FAZzhLIRpgA0BHdf7O6z\n3H2WtbTn3Q4KijnCYAhTAIquQ9KUAY/PSJ4D0mCOUDbCFICie0LSVDM7y8zaJF0hKft18ag3zBHK\nxvdMASg0dy+Z2VWS7pPULGmJuz+Xc1soGOYIMQhTAArP3e+RdE/efaDYmCOUizAFADVu0syLgmtf\ne+r+DDtBkQ0f/6bFiUM6sie7y8XSfKv5gq9+Mbj2Bze1Btc2tbQF1XU9+3dhxwt+ZQAAALwJYQoA\nACACYQoAACACYQoAACACYQoAACACYQoAACACYQoAACACYQoAACACYQoAACACYQoAACAC28kAQI3b\nvu5nebeAGmVN4edEDu/ammEn2bjt63+dyXH7Sj2BhWFb33BmCgAAIAJhCgAAIAJhCgAAIAJhCgAA\nIAJhCgAAIAJhCgAAIAJhCgAAIAJhCgAAIAJhCgAAIAJhCgAAIALbyQBADl59IHybjLM++MUMO0GR\nTZl9SXDt1sfvCa71wG1U0I8zUwAAABEIUwAAABEIUwAAABEIUwAAABEIUwAAABEIUwAAABEIUwAA\nABEIUwAAABEIUwAAABEIUwAAABHYTgYAcvDeL6/IuwXUgUdv/Ehw7eQP/DTDTrJhTeHnfPLcAocz\nUwAAABEIUwAAABEIUwAAABEIUwAAABEIUwAAABGquprPWtrd2kZV8yVRZd59UF7qsixfgzkqk6X4\nn8U9/LDNrcG1Ta1tQXW9h/ao7+jBTOcIACqlumGqbZRapn2imi+JKiu9tDLz12COypPVEuP2UycG\n1448fUpQ3e5VNwYfEwDyFvUxn5nNNbOXzGyjmS2qVFNoLMwRAKDIyg5TZtYs6RZJH5U0XdKVZja9\nUo2hMTBHAICiizkzNVvSRnff5O7dku6UNK8ybaGBMEcAgEKLuWZqsqStAx5vk3Te8UVmtlDSQklS\n68iIl0OdYo5QN5pawi/G375udYad5G/GvCuCa59ZcWeGnRRPmusbJ3/gzzLsJFxW12QeWnFtcO0p\nn/1+cG3P4f3BtSEy/2oEd1/s7rPcfZa1tGf9cqhTzBEAoFbFnJnqkDRwac4ZyXNAGswRKsLMNks6\nIKlXUsndZ+XbEYqGGUK5YsLUE5KmmtlZ6v/L7wpJv1eRrtBImCNU0hx33513Eyg0ZgiplR2m3L1k\nZldJuk9Ss6Ql7v5cxTpDQ2COAABFF/Wlne5+j6R7KtQLhpDmota+Uk+GnWSDOaqOt1/8yeDa9pHh\nM7fhkceDa898x7lBdQceGhZ8zAFc0v1m5pK+7+6LyzkIGhozhLJU9RvQASBDF7h7h5mdLmmVmb3o\n7muO/ZAVoQhwwhmSmCMMjo2OAdQFd+9IbjslLVf/d5gN/DkrQnFCJ5uh5GfMEd6EMAWg8MxspJmN\nPnZf0sWS1ufbFYqEGUIMPuYDUA8mSlpuZlL/f9d+6O735tsSCoYZQtkIUwAKz903SZqRdx8oLmYI\nMQhTAFAh3tebdws1gy1iymdNzcG1abZmyVKann/n01cG1474+DeCa5vb8ruGjWumAAAAIhCmAAAA\nIhCmAAAAIhCmAAAAInABek7SXCjXV+rO5LhpLhgsdR0KrkX1HL77K8G1U659MLh24tvODK7dceun\nwo+74K6guq6DzBuA4uDMFAAAQATCFAAAQATCFAAAQATCFAAAQATCFAAAQARW8wEACmPM5HOD6va+\nmt/WIrHGn/uu4Npdzz+SYSfhmlragmvX3nV7Jj30dncF177tffOC6ja/tiaojjNTAAAAEQhTAAAA\nEQhTAAAAEQhTAAAAEbgA/QSaWlozO3aaC+XSyGqLmJb2kWHHbCKfH89S/JnMvuL3Ux179+3fDa7d\n83L4/97DRo0Lrh35sZuCa8+YfWlQ3aEni3vxMIDGw998AAAAEQhTAAAAEQhTAAAAEQhTAAAAEQhT\nAAAAEVjNBwAn0NwWvrIwq1W6aVYW95V6MukhS2lWvO7veDmori+j/y3KlWaOdr/4i+DaWpmN9lNO\nC649mGIleRqtI8YE1257OmybmJ7DB4PqODMFAAAQgTAFAAAQgTAFAAAQgTAFAAAQgQvQT2DM5GnB\ntXu3rM+wk3BptojJ5Lh9fZm8fpG97X0fD67tOVpKdew5uy9KUb0iuPK1p+5P1UeobY//Q1Bd6dC+\nTF4fALLAmSkAAIAIhCkAAIAIhCkAAIAIhCkAAIAIhCkAAIAIrOYDgBPoK3Xn3UIht4hJwxtgFXBW\nc+R9vZkcN80WP5J0cOfmTPpIo7f7SHBtc9vwsEL3oDLOTAEAAEQgTAEAAEQgTAEAAEQgTAEAAERo\nuAvQW9pHBtfWyhYxtSD4YkSzbBupEU0trcG1m35+d3BtvV+IO3rS2UF1+zYNy7gTAKgczkwBAABE\nIEwBAABEiPqYz8w2SzogqVdSyd1nVaIpNBbmCABQZJW4ZmqOu++uwHHQ2JgjAEAh8TEfAABAhNgz\nUy7pfjNzSd9398XHF5jZQkkLJUmt4Svp0FCYI9Sst773Y8G1Wx75aXBtS/uo4Nqew/uDa+td6Irs\nUsrtULL2ry75dHDt8/cuD65tHXFKcG33wdeDa2tlZfFfvfpwcO3VZ50fXLuqJWy1/ud1KKguNkxd\n4O4dZna6pFVm9qK7rxlYkPzFuFiSmkZMCNvkBo2GOQIAFFZUdHf3juS2U9JySbMr0RQaC3OEUGa2\nxMw6zWz9gOfGmdkqM9uQ3I7Ns0fUNmYIWSg7TJnZSDMbfey+pIsl8S2XSIU5QkpLJc097rlFkla7\n+1RJq5PHwFCWihlChcWcmZoo6SEze0bS45L+wd3vrUxbaCDMEYIlH/8ef+HHPEnLkvvLJF1W1aZQ\nKMwQslD2NVPuvknSjAr2UrbgrU4klbrCLibDvxR8MaKnu5ypqHOURq1cyFkLDmx/Jaiur+domsNO\ndPftyf0d6g/oQBrMEKI03N58AOqXu3uyKvRNWBGKECeaIYk5wuBqa+0oAKS308wmSVJy2zlYkbsv\ndvdZ7j7LWtqr2iBqXtAMScwRBkeYAlB0KyXNT+7Pl7Qix15QTMwQohCmABSGmd0h6VFJ08xsm5kt\nkHSDpA+b2QZJFyWPgUExQ8gC10wBKAx3v3KIH32oqo2gsJghZIEwBQAnsOeV5zI5bl+pO5PjFtH9\nP7opuPbiT/+HsMIaW0V7eF/4CtWmlrbg2h1/Gv79omNvKN5e8mm2iDm4+uvBtZP+4Lagun3Nm4Pq\n+JgPAAAgAmEKAAAgAmEKAAAgAmEKAAAgAhegA2XoK/Vkctzrvpluf9UvvLrs5EWJ9t//WnDtmPd+\nIVUfoYK37DHL5PUBIAucmQIAAIhAmAIAAIhAmAIAAIhAmAIAAIhAmAIAAIjAaj4AOIGDO18NrvUU\nW5j0dneV005dCt4iRlJTS2tYYY2tCO145uHg2lLXoeDasTdsKKedujT+8m8H1x7dH7a1Tl9P2DZA\nnJkCAACIQJgCAACIQJgCAACIQJgCAACIULMXoAdfZCjJmpqDa9Nc9HnOnMuCazc++H+CayXpvN/7\nXHDtYz/8X6mOjV8J3r5E0q+/+9Lg2i2P3B1c+/LP/iq4dtrFXwqulaRPvqMzuHbGLdlsEZPG6Enn\nBNXtfWVYxp0AQOVwZgoAACACYQoAACACYQoAACACYQoAACACYQoAACBCza7mA4CsNLUO0+hJZwfV\nHtj+SsbdII2+Uk9YoXu2jUgaNmaszvnwJ4NqX1r1k4y7QegWMVKKld6B2xJxZgoAACACYQoAACAC\nYQoAACACYQoAACBCzV6A3tw2PLi25/D+THpIu0VMGmwRUx03bVgTXNv6na8F116TYjuZcy+6Orh2\nzORzg2slaca6vlT1odJsw+N94T2Uuo+EFaY4JgDkjTNTAAAAEQhTAAAAEQhTAAAAEQhTAAAAEQhT\nAAAAEWp2NR8AZMX7enX0wOt5t4GCG9e5VVd855qg2slrHgg+7sLPX19mRwgVvAo5cFsizkwBAABE\nIEwBAABEIEwBAABEIEwBAABEqOoF6NbSquHjfy2o9sie1zLuBkX19vYe3TFtR1Dt/pGnBx/3s9/5\nWbktVcz+jpfzbkFSui1i0jiypyOorq+3J5PXB4AscGYKAAAgAmEKAAAgwknDlJktMbNOM1s/4Llx\nZrbKzDYkt2OzbRNFxxwBAOpVyJmppZLmHvfcIkmr3X2qpNXJY+BEloo5AgDUoZOGKXdfI+n4rwqe\nJ2lZcn+ZpMsq3BfqDHMEAKhX5a7mm+ju25P7OyRNHKrQzBZKWihJ1j6mzJdDnSprjiaNaK9Ca6hn\nM88+XY8u/1JQ7eiPXB983KxWQaI2TZgxXX/0wJqg2vO++PeZ9DBszITg2qP7d2fSAypwAbq7u6Qh\nN69x98XuPsvdZ1nriNiXQ51KM0djh7VVsTMAAE6s3DC108wmSVJy21m5ltBAmCMAQOGVG6ZWSpqf\n3J8vaUVl2kGDYY4QbIgVodebWYeZrUt+XZJnj6h9zBGyEPLVCHdIelTSNDPbZmYLJN0g6cNmtkHS\nRcljYEjMESpgqd68IlSSbnb3mcmve6rcE4pnqZgjVNhJL0B39yuH+NGH0r5Y6/CRmjT9XUG1rz58\nd/Bxueiz9lVyjnq7e7Vvy76g2jt+EbZ9iSTtOvRI2lbqljXV3vf5uvsaMzsz7z5QbMwRslB7/8UE\ngHSuMrNfJh/fDPnFr2a20MzWmtna3XuO/5YOIP0c7dnN6jj0I0wBKLLvSTpb0kxJ2yV9a6jCgStC\nJ4wfV63+UAxlzdH4CeFfS4D6RpgCUFjuvtPde929T9Ktkmbn3ROKhzlCLMIUgMI69tUaicslrR+q\nFhgKc4RY5X4DOgBUVbIi9EJJE8xsm6TrJF1oZjPV/4WvmyX9SW4NohCYI2SBMAWgEIZYEXpbOcd6\neuMOnfrxG4NqL164IPi49/3PW8tpB1VUyTnqfOY5ffctM4JqN7y+Lvi4Yy74eXAtW8T8yilT3h5c\nu2/rCxV9bT7mAwAAiECYAgAAiECYAgAAiECYAgAAiECYAgAAiFDV1Xy/ftoo/Y+r3hNUe+nPV2Tc\nDYpq5JS36Lyb/31Q7d3vuzbjborj9WvfGlw77R8nB9fuej6DPQ3dK39MAMgIZ6YAAAAiEKYAAAAi\nEKYAAAAiEKYAAAAiEKYAAAAisDcfgMbT16dS16Gg0gf/948ybgbWFP7veu/ry7CTdHYOH6Nv/Mac\noNq//tjXg4+755opwbVf+dfLgmuXzPxgcO2+v3xXcK0knfK1J1LVh0ozG2f81m8G17I3HwAAQA0h\nTAEAAEQgTAEAAEQgTAEAAESo6gXoGzdt06WfXRRUu2b5N4OP+/7Lw7YWQX14estejfp3dwfVvrzj\nmeDj/veLri63pRNKcwFlWmkuxh1/89YUx91STjsA0JA4MwUAABCBMAUAABCBMAUAABCBMAUAABCB\nMAUAABCB7WQA4AR6Du8Lrm0dMSbFcfeX005dqqUtYlJxD+69a+/O4MO2T78suPbJ684Prj3lqfDV\nzVltD5OlDWt+FlwbvMraLKiMM1MAAAARCFMAAAARCFMAAAARCFMAAAARavYC9Dmf/VreLaAOTLv4\nS8G1w8ZMCK49un93cG2ai2u/8cpDwbWS9JWzL0hVDwCoPM5MAQAARCBMAQAARCBMAQAARCBMAQAA\nRCBMAQAARKjZ1XwAUAvSrMYsdR3MsBM0irFf/r/Btevfsyu49g/f/8fBtY/d8VJwrSQ1tbQF1/Z2\ndwXXpvn/X/fBN4JrwxvwoDLOTAEAAEQgTAEAAEQgTAEAAEQgTAEAAESo2QvQ01ygltU2ICi+NBcv\n/ucnlwfX/qep7wuubT91YnDtX/zOp4JrJcn2h198mubPAgAQjjNTAAAAEU4apsxsiZl1mtn6Ac9d\nb2YdZrYu+XVJtm2i6JgjAEC9CjkztVTS3EGev9ndZya/7qlsW6hDS8UcAQDq0EnDlLuvkfR6FXpB\nHWOOAAD1KuaaqavM7JfJxzdjhyoys4VmttbM1nop/KJyNAzmCABQaOWu5vuepP8qyZPbb0ka9Hvq\n3X2xpMWS1DRiQtj3sqNRMEdAhVlTun8js8qz9qTZFuWiA5cF1/6/VX9bTjtB0qzAr0dlnZly953u\n3uvufZJulTS7sm2hETBHAIB6UFaYMrNJAx5eLmn9ULXAUJgjpGFmU8zsQTN73syeM7Ork+fHmdkq\nM9uQ3A75cTHAHCELIV+NcIekRyVNM7NtZrZA0k1m9qyZ/VLSHElfyrhPFBxzhAooSbrW3adLerek\nL5jZdEmLJK1296mSViePgaEwR6i4k14z5e5XDvL0bRn0gjrGHCGWu2+XtD25f8DMXpA0WdI8SRcm\nZcsk/ZOkP8+hRRQAc4Qs1Ox2MmmMP+e3g2tfe+r+DDtBkX370hQnxl5eG1x6NMWWLx/708+H9yDp\np9+9NVV9qDQXMedxAbOZnSnpnZIekzQx+QtSknZIGnT/HjNbKGmhJKl1ZOY9ovYxR6gUtpMBUChm\nNkrSjyVd4+77B/7M3V39q0PfxN0Xu/ssd59lLe1V6BS1jDlCJRGmABSGmbWq/y/A2939J8nTO48t\nZkhuO/PqD8XAHKHSCFMACsHMTP3X2b3g7t8e8KOVkuYn9+dLWlHt3lAczBGyUBfXTAFoCOdL+pyk\nZ81sXfLcVyXdIOmuZIXoFkmfyak/FANzhIojTAEoBHd/SJIN8eMPVbMXFBdzhCwQpgCgQmpha5Za\n6CFLwStNbai8VF+2Pn5P3i0U0ilT3h5Ut/fVsG8A4JopAACACIQpAACACIQpAACACIQpAACACHVx\nAXrn8w/n3QLqwN7N64Nrf/tTg201OLinfnxHcO0vHng2uFbK7mLjlvZRwbU9h/efvAgA6hhnpgAA\nACIQpgAAACIQpgAAACIQpgAAACIQpgAAACLUxWo+AEBjCF7B6p5tIzUiqxW9wdv2ZNxHVvZtfSGo\nrre7K6iOM1MAAAARCFMAAAARCFMAAAARCFMAAAAR6uIC9FLXoeDaNBfVFe2COsQJvdBQkn7wb84L\nrp2RYjuZXc8/ElybJbaIAYBwnJkCAACIQJgCAACIQJgCAACIQJgCAACIQJgCAACIUBer+QAAAEKN\nOG1KUN3BV9qC6jgzBQAAEIEwBQAAEIEwBQAAEIEwBQAAEIEL0IEyzLz0y8G1w8f/WnDtkT2vpepj\n7FkzgmvfePWZVMcO1X7qxODarr07M+kBAPLEmSkAAIAIhCkAAIAIhCkAAIAIhCkAAIAIhCkAAIAI\nrOYDgAbW0j4yuLbUdSjDTlBLvK8vVX1zW3twbW93V9p2gqRZOX1419agur5Sd1AdZ6YAAAAiEKYA\nAAAiEKYAAAAiEKYAAAAiNNwF6GkvqgMGk2aOeo8eyayPrLaIsabwf2exRQyARseZKQAAgAgnDVNm\nNsXMHjSz583sOTO7Onl+nJmtMrMNye3Y7NtFUTFHAIB6FXJmqiTpWnefLundkr5gZtMlLZK02t2n\nSlqdPAaGwhwBAOrSScOUu29396eS+wckvSBpsqR5kpYlZcskXZZVkyg+5ggAUK9SXTNlZmdKeqek\nxyRNdPftyY92SJpY0c5Qt5gjAEA9CV7NZ2ajJP1Y0jXuvt/M/vln7u5m5kP8voWSFkqSWsO3LUB9\nYo6A2pJmi5g0qzyzWjl9zpywk9evvrYmk9fH4LLaIibNzB3dtyu49iP/9vNBdT//+qNBdUFdmlmr\n+v8CvN3df5I8vdPMJiU/nySpc7Df6+6L3X2Wu8+ylvC9e1B/mCMAQD0KWc1nkm6T9IK7f3vAj1ZK\nmp/cny9pReXbQ71gjhDrBCtCrzezDjNbl/y6JO9eUbuYI2Qh5GO+8yV9TtKzZrYuee6rkm6QdJeZ\nLZC0RdJnsmkRdYI5QqxjK0KfMrPRkp40s1XJz25292/m2BuKgzlCxZ00TLn7Q5JsiB9/qLLtoF4x\nR4iVLFTYntw/YGbHVoQCwZgjZKHhtpNJoxYutkyriD3Xu+6Db+TdgiSppT38wv03/tu7gmtHX/tP\nZXQT57gVoedLusrM/lDSWvWfdXjTHzqLGHA85giVwnYyAArl+BWhkr4n6WxJM9V/xuFbg/0+FjFg\nIOYIlUSYAlAYg60Idfed7t7r7n2SbpU0O88eUfuYI1QaYQpAIQy1IvTYV2skLpe0vtq9oTiYI2SB\na6YAFMVQK0KvNLOZklzSZkl/kk97KAjmCBVHmAJQCCdYEXpPtXtBcTFHyAJhCgAQJM0K4KaW1kx6\nuOp33xFUd+M/jsjk9VFdaWYuTe3tfzAzqO4D3x8eVMc1UwAAABEIUwAAABEIUwAAABEIUwAAABG4\nAP0EirjdShF7RvnSbB9U6joUXHvKnz+cSQ8X/vEfBdX94sZHgo8JAHnjzBQAAEAEwhQAAEAEwhQA\nAEAEwhQAAEAEwhQAAEAEVvMBACqur9QTXNs2amxw7X/8i78JqjvYsSv4mKgPLe0jg2tPe/+fBdX1\nvLQ1qI4zUwAAABEIUwAAABEIUwAAABEIUwAAABGqegG6H9mzu2fdD7Yc9/QESbur2UeV1fP7G+y9\nvTXrF23AOaqb93b/U7cd/9RQ7y3zOQKASqlumHI/7fjnzGytu8+qZh/VVM/vL6/31mhzxHsDgNrG\nx3wAAAARCFMAAAARaiFMLc67gYzV8/urpfdWS71UGu8NAGpY7mHK3ev6P6b1/P5q6b3VUi+VxnsD\ngNrGdjIAgFyVug5WvNZ7S+W2g4LqK3UH1w4b86Z1TIMqNYXFpNzPTAEAABRZrmHKzOaa2UtmttHM\nFuXZS6WZ2WYze9bM1pnZ2rz7iWVmS8ys08zWD3hunJmtMrMNyW34bqWV7Y05KoBaniEAiJFbmDKz\nZkm3SPqopOmSrjSz6Xn1k5E57j6zTr5HZ6mkucc9t0jSanefKml18riqmKNCWaoanCEAiJXnmanZ\nkja6+yZ375Z0p6R5OfaDE3D3NZJeP+7peZKWJfeXSbqsqk31Y44KooZnCACi5BmmJkvaOuDxtuS5\neuGS7jezJ81sYd7NZGSiu29P7u+QNDGHHpijYquFGQKAKKzmy84F7t5hZqdLWmVmLyb/Mq9L7u5m\n5nn3UYcaZo6YIQBFleeZqQ5JUwY8PiN5ri64e0dy2ylpufo/jqo3O81skiQlt5059MAcFVstzBAA\nRMkzTD0haaqZnWVmbZKukLQyx34qxsxGmtnoY/clXSxp/Yl/VyGtlDQ/uT9f0oocemCOiq0WZggA\nouT2MZ+7l8zsKkn3SWqWtMTdn8urnwqbKGm5mUn9f8Y/dPd7820pjpndIelCSRPMbJuk6yTdIOku\nM1sgaYukz1S7L+aoOGp1hgAglrlziQKAxtI0YoK3TPtE3m0g0dTSWvFjdj//E/Ud2mUVP/AAzFFt\nSTNHbaPGBdUdWrtUvQe2n3SOuAAdAJCrvlJPcG1zW3tgZaY5CjUozRwFb0vkvUF1bCcDAAAQgTAF\nAAAQgTAFAAAQgTAFAAAQgTAFAAAQgTAFAAAQgTAFAAAQgTAFAAAQgTAFAAAQgTAFAAAQge1kAAC5\nsqbwf9e3tI8KqutOcUzUhzRz1H7KaUF13c1h+/0xbQAAABEIUwAKwczazexxM3vGzJ4zs/+SPH+W\nmT1mZhvN7O/MrC3vXlG7mCNkgTAFoCiOSvqgu8+QNFPSXDN7t6QbJd3s7udIekPSghx7RO1jjlBx\nhCkAheD9DiYPW5NfLumDkn6UPL9M0mU5tIeCYI6QBcIUgMIws2YzWyepU9IqSa9I2uvupaRkm6TJ\nQ/zehWa21szWeqmrOg2jJjFHqDTCFIDCcPded58p6QxJsyX9Rorfu9jdZ7n7LGtpz6xH1D7mCJVG\nmAJQOO6+V9KDkt4j6VQzO/Y1L2dI6sitMRQKc4RKIUwBKAQzO83MTk3uD5f0YUkvqP8vw08nZfMl\nrcinQxQBc4Qs8KWdAIpikqRlZtas/n8I3uXuPzWz5yXdaWZ/KelpSbfl2SRqHnOEiiNMASgEd/+l\npHcO8vwm9V/3ApwUc4QsEKYAALmypubg2qP7dwfVeW/p5EWoK00t4d+z+pbpM4Pqjjw9POy1g18Z\nAAAAb0KYAgAAiECYAgAAiECYAgAAiECYAgAAiECYAgAAiECYAgAAiECYAgAAiECYAgAAiECYAgAA\niGDunncPAFBVZrZL0pZBfjRBUth+JcXTaO/tre5+WpYvOsQcNdqfc70Y6r0FzRFhCgASZrbW3Wfl\n3UcWeG/VUUu9VBrvbWh8zAcAABCBMAUAABCBMAUAv7I47wYyxHurjlrqpdJ4b0PgmikAAIAInJkC\nAACIQJgCAACIQJgC0PDMbK6ZvWRmG81sUd79VJKZbTazZ81snZmtzbufWGa2xMw6zWz9gOfGmdkq\nM9uQ3I7vpBBZAAABp0lEQVTNqTfmqACymCHCFICGZmbNkm6R9FFJ0yVdaWbT8+2q4ua4+8w6+Y6g\npZLmHvfcIkmr3X2qpNXJ46pijgplqSo8Q4QpAI1utqSN7r7J3bsl3SlpXs49YQjuvkbS68c9PU/S\nsuT+MkmXVbWpfsxRQWQxQ4QpAI1usqStAx5vS56rFy7pfjN70swW5t1MRia6+/bk/g5JE3PogTkq\ntqgZaql8PwCAGnKBu3eY2emSVpnZi8m/zOuSu7uZ8Z0/ldcwc1TODHFmCkCj65A0ZcDjM5Ln6oK7\ndyS3nZKWq//jqHqz08wmSVJy25lDD8xRsUXNEGEKQKN7QtJUMzvLzNokXSFpZc49VYSZjTSz0cfu\nS7pY0voT/65CWilpfnJ/vqQVOfTAHBVb1AzxMR+AhubuJTO7StJ9kpolLXH353Juq1ImSlpuZlL/\nf+9/6O735ttSHDO7Q9KFkiaY2TZJ10m6QdJdZrZA0hZJn6l2X8xRcWQxQ2wnAwAAEIGP+QAAACIQ\npgAAACIQpgAAACIQpgAAACIQpgAAACIQpgAAACIQpgAAACL8f9dWpbNLWXF+AAAAAElFTkSuQmCC\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "r_cc_phm = phm_list[1][2]['R']['CC'][0]\n", - "r_mlo_phm = phm_list[1][2]['R']['MLO'][0]\n", - "fig,ax = subplots(1, 4)\n", - "fig.set_size_inches([10, 8])\n", - "ax[0].imshow(r_cc_phm[:,:,1], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[1].imshow(r_cc_phm[:,:,2], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[2].imshow(r_mlo_phm[:,:,1], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[3].imshow(r_mlo_phm[:,:,2], cmap='RdBu_r', vmin=0, vmax=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Explore another case" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "subj = 55\n", - "exam = 1\n", - "L = 1\n", - "R = 0\n" - ] - } - ], - "source": [ - "print 'subj =', phm_list[2][0]\n", - "print 'exam =', phm_list[2][1]\n", - "print 'L =', phm_list[2][2]['L']['cancer']\n", - "print 'R =', phm_list[2][2]['R']['cancer']" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAD6CAYAAACFzcLnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XuUXFW17/Hf7EfoPEkg2IQQCAKiGCVgRI4PTgRBBI6I\nDxS4yFGO0SvxiHJRBtcx4HA4XhUQUBENggk+QDzIgItcNEYREEEDRozh/U4InRACnXdS3fP+0RXs\nhL1WVe9dj72rv58xMrp7z15VM52Zrll777WWubsAAACQTluzEwAAACgymikAAIAMaKYAAAAyoJkC\nAADIgGYKAAAgA5opAACADGimAAAAMqCZAgAAyIBmCgAAIIOOZicAAFmZ2VGSLpPULukH7v616Pd3\ndLmNGFO75+/oDMa8tCUy0MKxvO9OYZH34t7fuDxCKWxeKy9tjPyAX23IddQ50tu6dkx+/r7SUJ5a\nknTgHsmPJUl/eeblYKxtRFcw1r9lc+Lx9siYvk3rg7Hhpto6MraTAVBkZtYu6RFJR0haKunPkk50\n9yWhMW2jJnrHfu+vWQ4jd94tGNuw6rlgzNrCDYn3N78hiYm+GG/e2MBMkpUevln961+ouplKU0ft\nY3f1UQd9PDG2qfeFoSUsae23jwrGxnzutmBs3OTXBWPrVj4TGLNfcMzqJ/8ajA031dYRl/kAFN3B\nkh5z9yfcfbOk6yQd1+ScUDzUEVKjmQJQdJMlPTvo66XlY8BQUEdIjXumAAwLZjZL0ixJUufo5iaD\nwhpcR7bDuCZng7zgzBSAolsmacqgr3cvH9uGu89x9xnuPsM6wvf7YNgaeh11jmxYcsi3TGemGjqD\nJnKjpqRsM1+4CX9IrD08c6l/48vyLevrOoNm4s47+557TAnGl64Jz55qb4untnJFeMZM3+YN0bHR\nmVnSsKyzts4dwrHIDLjSulXq37im2jr6s6R9zWwvDbz4fUzSSUNIE5CoI2SQupkqz3y4XINmPpjZ\nzbGZDzZijNLOoOnoip+Wr/hCF5H3WTN50zW+Oxhbt3DukB4rTR3tuccU3X37guBjfum3r3oz+Yod\nR42I5vO9K24Nxl56enF0bGxmljQ862zspL2Dsa4J4TpaedsFVT+Hu5fMbLakX2mgIb/a3f9efZbZ\nveX97w3G/jBvXjDW1hGux1Bsy/re6hOro/5S8pT7okpTR95XCs7aG7/ntOC40O+SF/9wZ7XpbqNj\nZPgkRejfaW3Pk6meC8mynJl6ZeaDJJnZ1pkPwRdBIAF1hMzc/VZJ4U4YqAJ1hLSy3DNV1cwHM5tl\nZgvNbKGXmr/2CHJnyHW0ctWqhiUHAEAldb8BnZs+UQuD62iXnXdudjoAALwiSzNV1cwHoALqCABQ\naFmaqVdmPpjZCA3MfLi5NmlhGKGOAACFlvoG9EbPoCltXFevh0aC2F5jIyfsGoxt6AxPd0+Spo7+\n8vBSjZ151pCep1qxWaOx/a8kafl/HhCNTzzrD8HYxtXPR8cWdSbgzy+bFYzNvuKPwdiLHcVaT/iu\nH/4wGOscFV7YMTYzLw/728VYW3swVtR6raXeZQ8HY6GZfks+eEr4Aa/7cjD04mP3V53XVllmwOPV\nMv3GYuYDaoE6AgAUGSugAwAAZEAzBQAAkAHNFAAAQAY0UwAAABnQTAEAAGRQrPnHaJjShrXBWO+6\n8JTfvE/nriS2BEfvskeiY8d95ploPLYx7A0/im/se8Jp/xWN53XpkM98K7wcxHOLwpu6bl4frr96\nG7XLlGBs/cpng7GQvGxMXGve39ew54ptIt7MZRjaR3RpxylvSIzF/r+HNjo+6oTw8ge19q8LfxWM\nXT39sIblUQ+hZWzWrwqvB531dyhnpgAAADKgmQIAAMiAZgoAACADmikAAIAMaKYAAAAyoJkCAADI\ngKURAGCQNMsfNFJbR2cw1l/a0rA86rEkQWgJhNhzdXSNTjxeiiynUCt9Wzapd1nyUjHNXLJhsDHd\nUxOPj263xibSQMt+9Ink4+d/ITjmgPt3Tzze115dm9QyzVT7iK5grJlrH8XWRxmItwdjjfzFuL3U\na+Pk4BdI7MXmr7dcGB375qO/GIxV+uVYqc5G7rxbMPbBk8+Jjn3PrH+Lxu+56TfB2Nqep6Jj6+mp\nu25ONc77SjXOBADqh8t8AAAAGdBMAQAAZEAzBQAAkAHNFAAAQAY0UwAAABm0zGw+AMObmT0laY2k\nPkkld5/R3IyyCc0EbuYs33pLs5xA3+YNgQfzIT/WUGto4q6v0QnnfC4x9uvbnwyOSzvLNeS9n/lU\nMHbnDbclHr/nv64KP+B1P86aUtViM97TLi+xz7/fmnh8xZJRked6Ifl4lTOLc9VMdY3vDsY2vtQT\nHRublr7DuInRsZMPeEc0/sSdN0XjMZWKIc36KVuVNq5LlVPRWXtntFZKG9cGY2866ozUzztp+uHR\n+OVnxeMfOuUrqZ/7N3N+EI13jhoXjI2dtHd07Jrlj6fKKafe7e7JvxWB6lBDGDIu8wEAAGRAMwWg\nVbikX5vZfWY2a/ugmc0ys4VmttBLzVvIF7kWrSFp2zra0Lu6wekhr3J1mQ8AMninuy8zs9dImm9m\nD7n7HVuD7j5H0hxJahs1ceg31GA4iNaQtG0dvWbvN1JHkMSZKQAtwt2XlT+ukHSjpIObmxGKhhpC\nWpyZAlB4ZjZaUpu7ryl/fqSk85ucVibrrjk58fio//GjBmeSbMSYCcHY5rWNu/wVnMQzxNl8aWqo\n7YnHNOpjxyfGLt4lPHPsQ9217dHm/2BuMPaa/ZMnWP25gTP2YmL706adzdez+M606aRGMwWgFXRL\nutHMpIHfaz919+Q54UAyagip0UwBKDx3f0LSAc3OA8VFDSGLXDVTldaSSmtTb3zJkErrSN0+7tFg\nbGbvvqlyqsbqX8evUoybeVYwlvb0aBF435a61UrM8kULovEPnhyPxy6L7LjHG6JjVy65Oxrfsr43\nGBs5Ydfo2LaOzmCslReIBIBaydRMtdqKw2gO6ggAUGS1ODPFarGoBeoIAFBILI0AAACQQdYzU1tX\ni3VJ3y8vZraN8iqyAyvJdsb3msOwRR1hWJr4+rcFY2M/+fMGZjJ0sT0wh4sVI8fq0v1mJsYubWAe\nsXsbVyz5QwMzGbrY0ghSce7ZzNpMDWm1WFYdRgB1BAAorEyX+VgtFrVAHQEAiix1M2Vmo81s7NbP\nNbBa7OJaJYbhgToCABRdlst8w2a12NhaUt3T3hUde8TR8TXgfvyN7wRjYw89M55Yaxg2dRTbYuOl\np+L947Mnj43Gp/xkTTDWu+yR6FhrC7+nisWk1l7PDACqlbqZYrVY1AJ1BAAoOpZGAAAAyCBX28kA\nwHCy8I2LgrGpD21qYCZhoW2QYpetY5eHuTQc3sIp7fZNs77y+WBszgWXpXrMRukvbQ7GilRHnJkC\nAADIgGYKAAAgA5opAACADLhnKqOexXdG4z+uEH/u998OxvZ479nRsaWN66LxmJE77xaNb1j1XOrH\nLrLQvQySNHbSPtGxLz/7YOrn3bK+Nxqf8pP4+DV3XByM3fTE+ujYjiOPDMY+tlt4uxNJOuurZ0Xj\nF55zYTQOAK2AM1MAAAAZ0EwBAABkwGU+AGiSqTfkY/mDmN0PPDTx+BN33hQc09E1JhirdEm7SNpH\njNT4Paclxi6/+wfBcSdPPqSmeeR9+YOY2BIHsdsuWBoBAACghdBMAQAAZEAzBQAAkAHNFAAAQAaF\nuQE9tkePlP4mtoGxfakfO6vd/vlzwVj7iK7Uj1vp77zp5ZXReJH2RKqlto4RwVjXjhOjY3960dei\n8WNPPCcYy/ozHXvomcFY56hx0bGvPfXrwdiYB+6Pjv3meeF10gBguChMMwUAZna1pGMlrXD3aeVj\nO0n6maSpkp6SdIK7h3fhxZDEZu2F9G3eEIw1+41aLWto55dX6ORbkt9QfO748BscPRZ/k4IB1tYe\niabbFLpeuMwHoEjmSjpqu2NnS1rg7vtKWlD+GgiZK2oINUYzBaAw3P0OSS9ud/g4SfPKn8+T9IGG\nJoVCoYZQDzRTAIqu292Xlz9/XlJ3M5NBIVFDyIR7pgC0DHd3M/OkmJnNkjRLktQ5upFpoUBiNSRt\nW0djFbunB8MJZ6YAFF2PmU2SpPLHFUnf5O5z3H2Gu8+wjvQzZdGSqqohads6GkUzhbJcnZm675eX\nBGNvOeYLqR+3v5Svu/6r1bd5YzT+lg+fFIzd998/jY7NstRE3q1d8NVgbMzh4eUJpPgspJ7Fd0bH\nHvPReLxZKu2FNnbCyGBs4r5vjo595p5l0XiDZm7dLOlUSV8rfxz69DMMd9QQMslVMwUAMWZ2raSZ\nkiaa2VJJ52rgBfB6MztN0tOSTmhehpDib2A7usKXWEsb19UjnW3UsoZWjByrb+/37uQgyx9kFjuh\n0Ow62h7NFIDCcPcTA6HDG5oICosaQj1wzxQAAEAGNFMAAAAZ0EwBAABkQDMFAACQAc0UAABABrma\nzTfjXyK7bLeoLOvw3P+L61I/b5Y1frqnvSsY63nmt6kft1YqrSUVU9T1tWJ1dMl3vhId+4XZFwRj\nBx7/0ejYTdMOjcZXPnRPNB5SqrAO2nDQPiJ5YdFK68+l0aD1wCQ1Z9p63tz4wsLE4/93aXhNuKun\nH1avdAoptiZgM1T8jWVmV5vZCjNbPOjYTmY238weLX+cUN80UXTUEQCgVVXz9m+upKO2O3a2pAXu\nvq+kBeWvgZi5oo4AAC2oYjPl7ndIenG7w8dJmlf+fJ6kD9Q4L7QY6ggA0KrS3jPV7e7Ly58/L6k7\n9I3s1I4I6ggAUHiZ7/J0d5fkkTg7taMi6ggAUFRpm6keM5skSeWPK2qXEoYR6ggAUHhpL/PdLOlU\nDey0faqkm2qWEYYT6ggIqPUSCM/P2iUY23XOymBs3OTXJR7vXfZIqjzaOjqDMe/vCxwv3pIlHV3h\n2xHmP7cm8fii/7wy/IA3XJs1pZr4/PlfDMZ+dM3twdhpn0zeR/rCcy5MlYe1tQ95TD3rqGIzZWbX\nSpopaaKZLZV0rgZe/K43s9MkPS3phFokk+Uvuufb/yUYO+K7X4iOPWvl36LxAz7y1WBs40s98cQq\nyPJ3btYvmL7S5nDQk6/UNbKOslhx+qRgrPuK+L/1Wz4c2ox+wL9+4/RgbPbUt8cTqyBWC2d89vzo\n2M5R44KxB2//fXTshlXPxROL2PXNM4Ox5x+9JfXjAkCjVWym3D30CpHcZgIJqCMAQKtimWEAAIAM\naKYAAAAyoJkCAADIIFcbHQNAsz32m28FY/u8598bmEltvfDA48HYjlPeFoy9/OyDicfTbo7cX9oS\njLWSae8LT4r6bmiz5zrM2Jt2bPK8nsW3XJ/q8X7/l/Ckk6M/GJ5IE5q192+Lfhscc9VB7wnG8lZH\nnJkCAADIoDBnpmJrk0jS6qcfDsYm3nZbdOx+R3w+VU7D1QsP3RuMlULvuHJi6jvfH42/YX74Xdf1\n18yOjl381ndF42cc8L5wsIk/tzt3WRKMveG0+M9rwvnh9Ymk+FowPYvvCMZKG9ZGHxcA8oQzUwAA\nABnQTAEAAGRAMwUAAJABzRQAAEAGhbkBHcDwZmZXSzpW0gp3n1Y+dp6kT0naeif8Oe5+a5bnycvy\nB6GlB9LuyTntnvAejFLy8gcxO07ZPxjrXRaeEJRmSnvaZRgSH6tBdbToxuuyDK+ZDWs2JR5/20mn\nBMfc+9MfBWOxv9ei6tN6xQ8PPjoY8xpv9h2byJZ1qQXOTAEoirmSjko4fom7Ty//yfQCiGFhrqgj\n1BjNFIBCcPc7JL3Y7DxQbNQR6qEwl/naR4yMxnuXPRKMff2ci2qdDgrq2T/9OhqPrYv0sU8ujo59\n6bk/RuP/8e4vB2N7HHJsdOwz99wSjccug1Ry6Ko3BWNbzoufuK98iSXlqXP3oXz3bDP7uKSFks50\n99XpnhTDHHWE1DgzBaDIrpC0t6TpkpZLujj0jWY2y8wWmtlCL9X2XgwUHnWETGimABSWu/e4e5+7\n90u6UtLBke+d4+4z3H2GdXQ1LknkHnWErGimABSWmU0a9OXxkuLXYoEE1BGyKsw9UwCGNzO7VtJM\nSRPNbKmkcyXNNLPpklzSU5I+3bQEayztEgiN8tLT4X5jvyM+GIw9PP8XQ36uWv4shlsdPf77m5KP\nNziPkP7S5mBsww3/Mxgb+aErhvxcHV1jgrHNa7PdIkczBaAQ3P3EhMNXNTwRFBp1hHrgMh8AAEAG\nhTkztWV9b+qxeT9djsbpq7CibpYVcsdGlj6QpDHdU4Oxh7+8V3TsyOOj4Uw1Hju93T4ifoPtqJ0n\nR+NrluflYgIA1A9npgAAADKgmQIAAMiAZgoAACCDwtwzBQAohjTLH6QVvM/RrGE5YEDsvtC1PU8l\nHo/d7znqI9/PmNG2si5/EMOZKQAAgAxopgAAADKgmQIAAMiAe6aAQSqtJZXFlg1rg7GRx387OrZr\nfHc0vvGlnlQ5VRLb6kGSRu28WzS+ac2LwVg9718AgEaqeGbKzK42sxVmtnjQsfPMbJmZLSr/Obq+\naaLoqCMAQKuq5szUXEnfkXTNdscvcfeLap4RWtVcUUdALnV0jQ7GShvXNTAT1Fro3zb273rG4tuD\nsUunzQzG7tnrgWBsWs+4xOPWFj6nE5vpd+hpnwzG7rjq6sTjsR0uuibsmnh87eMjgmO2eexK3+Du\nd0gKn6sHqkAdAQBaVZYb0Geb2QPlyzcTQt9kZrPMbKGZLfRSfF80DEvUEQCg0NI2U1dI2lvSdEnL\nJV0c+kZ3n+PuM9x9hnXEN03FsEMdAQAKL1Uz5e497t7n7v2SrpR0cG3TwnBAHQEAWkGqpRHMbJK7\nLy9/ebykxbHv/8fANrWPCJ9V6NvM5Zsi2Gmfg4KxVU//purHSVtHbxpvuvUD4dJ90x93D8ZCWxrU\nQuxGSklq32Fk6sfu27wh9dhKYjdl7j7jvdGxR37vi9H4/1mzJBjb5V2z44kBQEFUbKbM7FpJMyVN\nNLOlks6VNNPMpktySU9J+nQdc0QLoI4AAK2qYjPl7icmHL6qDrmghVFHQH4VefmD4EK77o1NJKfS\n/NvGlj8YtcuUYGzaPc8O+bliyx/ELLrtriGP2e2gI4OxpX/6ZeLxSgsXb8V2MgAAABnQTAEAAGRA\nMwUAAJABzRQAAEAGNFMAAAAZpFpnKj2v+s545NfqJxYFY32b6rce0lY9y3t12QXh9aw+EZnF8+03\nvbseKUmqPCtl/cqhz3TZqrRxbeqxlVhbezD2zD23RMf+YPph8ThrSQEYBhrcTAFAOmY2RdI1kro1\nsDbZHHe/zMx2kvQzSVM1sF7ZCe6+ull5bm+PQ44Nxio1q6i9WtaRtXeqa3x3YmzjSz01zFra/eBj\ngrHQtP56yPKmcKh2GDcxGOtd9kgwFloCoWfxncExHV2jE4+XKizGvBWX+QAURUnSme6+v6RDJJ1u\nZvtLOlvSAnffV9KC8tdACHWEmqOZAlAI7r7c3e8vf75G0oOSJks6TtK88rfNk/SB5mSIIqCOUA9c\n5gNQOGY2VdKBku6V1D1oj8fnNXD5JmnMLEmzJEmdyaf0MbxkrSPbYVz9k0QhcGYKQKGY2RhJN0g6\nw917B8fc3TVwH8yruPscd5/h7jOsI7zhOoaHmtRR56gGZIoioJkCUBhm1qmBF8CfuPsvyod7zGxS\nOT5J0opm5YdioI5QazRTAArBzEwDm2M/6O7fHBS6WdKp5c9PlXRTo3NDcVBHqIfG3jPlHl2Lp31E\n+NR73+aN9cgIKUTXU2rATu0rRo7VpfvNDMZ32uegYKz9mSXRx85rnaXdWb0aef07J3iHpFMk/c3M\nti52do6kr0m63sxOk/S0pBOalF+iZfeH10RDdhaaum4WGlKzOvK+LTVfAiFkQ4Oep172Pfz4xOOP\nLrgxOGZT7wupnuv5B36XeLy/tCU4JlhHVb6mcQM6gEJw97skhV4hD29kLigu6gj1wGU+AACADGim\nAAAAMqCZAgAAyIBmCgAAIANuQAeAKoVm/MRmW6adLdnW0Rl4rr7gmHrO+syr4N+5ATOLG2nVIwtr\n+njB2WsVpK2x0Ky9sZP2Do5542HvCMYevvuvwdjqJ8OxkKx1lKtmqkBTtBsitlSEJPWXNgdjw/GX\n6lYvPnZ/s1PIlf2P+nA0/uJz4enHzz9we42zAYDWw2U+AACADGimAAAAMqCZAgAAyIBmCgAAIAOa\nKQAAgAxyNZsPAPIsNEv2jMW3B8fM/9Llwdjfb/15MBbalDXtlHYU25pLDgvGxn7ht0N+vG9865xg\n7OwvXhSM1XrW/Zrljwdj8z/92WDsib9/Mxh7dsOyxOPHjjyg+sSGiP+VAAAAGXBmqsli7zJj60hJ\n8bWkOrpGR8eWNq6LJ4aW8eSf747GN6x6rkGZAEBrqnhmysymmNnvzGyJmf3dzD5fPr6Tmc03s0fL\nHyfUP10UFXUEAGhV1VzmK0k60933l3SIpNPNbH9JZ0ta4O77SlpQ/hoIoY4AAC2pYjPl7svd/f7y\n52skPShpsqTjJM0rf9s8SR+oV5IoPuoIANCqhnTPlJlNlXSgpHsldbv78nLoeUndgTGzJM2SJHXG\n7+PB8EAdAQBaSdXNlJmNkXSDpDPcvdfMXom5u5tZ4tbK7j5H0hxJahs1sbW28caQUUdoRZdOmxkO\nRpY/SKPom5jHNnBPM+2+raMzOTDod0srSLP8QcxZsy+o6eNVEppsFavncTPPCsZeuutnwdgBbz99\nSDlI0s6ve2vi8ReeXhAcM1hVSyOYWacGXgB/4u6/KB/uMbNJ5fgkSSuqekYMW9QRAKAVVTwzZQOn\nDq6S9KC7D14l62ZJp0r6WvnjTXXJsMXFuvLgO64q5G3pA+qoeSotfRB7t1b0syAA0AjVXOZ7h6RT\nJP3NzBaVj52jgRe/683sNElPSzqhPimiRVBHAICWVLGZcve7JIUuPh9e23TQqqgjAECrYjsZAACA\nDGimAAAAMmBvPgCFYGZTJF2jgbXIXNIcd7/MzM6T9ClJK8vfeo6739qcLF+t98J3BWOdr50WjI36\nyPcTjxd9UkCa5Q9i/l9pUeLxz/j6xONFraNDTv54MHb5SQcGY5+cc2/i8b/edF3mnLYXmzQ1fuqb\nEo+/+Nj9wTF7vv3YYKzn3E8HY1Pf+cHE4y89+0RwzPhJkxKPr+6sbiIYzRSAoti6JdH9ZjZW0n1m\nNr8cu8TdL2pibigO6gg1RzMFoBDKK+UvL3++xsy2bkkEVI06Qj1wz1SOWVt79I/39wf/ANUqYh1t\ntyWRJM02swfM7GozmxAYM8vMFprZQi/V9lITiok6Qq3QTAEolO23JJJ0haS9JU3XwBmHi5PGufsc\nd5/h7jOsI7ylCYYH6gi1RDMFoDCStiRy9x5373P3fklXSjq4mTki/6gj1Br3TAEohNCWRGY2qXwf\njCQdL2lxo3N77JhwbNxZd0ZGxmLYatzk1wVjN3zi84nHV3/ppMTjea6j3Q46Mhi75yfXBGNv/dm1\nwVh/aUumnIYi9lyxWXsh7zvstcHY+FVTgrFlP7w98Xh/aXNwzMvPLkk8vmXty8Exg9FMASiK0JZE\nJ5rZdA1Mc39KUnjONEAdoQ5opgAUQmRLotysBYT8o45QD9wzBQAAkAHNFAAAQAZc5qsza4v3q20d\nI4KxWm+70DLMoj/XPK+PVC/8PACgeTgzBQAAkAFnpgAgozf/KTxN+75ffjEYe9uHvhKMlTauy5RT\nK1nb82QwNueCyxKPl5b31Cudunnu/l8HY7FNhK968o5g7BNT/ilTTtvrGt8djG3qXRmMpTlDfsX5\nlwZj34/8PGq6HIR7Vd/GmSkAAIAMaKYAAAAyoJkCAADIgGYKAAAgg4begN4xcoy6p70rGF+x5A/B\n2MTXHxJ97B3GTAjGlv7pl5WTaxKWPxi6ffaarMuv/WowfsxHz0792EVdYiDPuQFAq+PMFAAAQAYs\njQAAGa1f+Www9pZjvhCMdY4aF4yFpqDXevp5JaGztY08G1rTqe4FFfsZnHjPD4OxR6adnHh85aa+\n4Jj9xoYXk546KrwkwRGXfyIYO7P/PYnHfz7vtuCY3mWPBGN5qwnOTAEAAGRAMwUAAJABzRQAAEAG\nNFMAAAAZ0EwBAABk0NDZfKUNa9Wz+M5g/Kqr/iMYO+20c+uRUlWyrD3E+j+1t/7Bh7TooHcE42uf\nXBCMjTnyvOhj8+8FABiqis2UmU2RdI2kbkkuaY67X2Zm50n6lKSt83TPcfdb65Uoio06Al5ty/re\nIcd2ffPM4JiVD90TjDVygeDYkg+xvzPS6frInHDwDf+cePiEL342OORLl34vVR5+wZJgbMSY5YnH\ndxi7U3BMkRZRrubMVEnSme5+v5mNlXSfmc0vxy5x94vqlx5aCHUEAGhJFZspd18uaXn58zVm9qCk\nyfVODK2FOgIAtKoh3YBuZlMlHSjp3vKh2Wb2gJldbWaJm+OZ2SwzW2hmC73EPnTIXkfrPLxyLwAA\njVZ1M2VmYyTdIOkMd++VdIWkvSVN18AZh4uTxrn7HHef4e4zrKOrBimjyGpRR6OtvWH5AgBQSVXN\nlJl1auAF8Cfu/gtJcvced+9z935JV0o6uH5pohVQRwCAVlTNbD6TdJWkB939m4OOTyrfByNJx0ta\nnDWZei1/0D4ifkas0iyXvM0aKKJa1tHyUeP1H9PfH4z/7wrLH0TzLNDskeHGzLok3SFpBw387vpv\ndz/XzPaSdJ2knSXdJ+kUd9/cvEyrl6benn/g9prnMWJM4tV1SdKOe7wh8fjKJXcHx+R5xl4t62jk\n+J20/3EfS4zd/ak9guNGH/uNdMkH7H7wMcHYo2fsknh85EnfDY6JvWbGXi/bOsKbIK/4X8k/j/Hn\n/TU4pkiqOTP1DkmnSDrMzBaV/xwt6Rtm9jcze0DSuyWFt0YHqCNkt0nSYe5+gAYuCx9lZodI+roG\nZoTuI2m1pNOamCPyjzpCzVUzm+8uSZYQYi0gVI06Qlbu7pLWlr/sLP9xSYdJOql8fJ6k8zRwLx7w\nKtQR6oHbA0rJAAAFgElEQVTtZAAUhpm1m9kiSSskzZf0uKSX3L1U/palYskNVEAdodZopgAURnmy\nwnRJu2tgssLrqx3LMi3YqlZ1VFr/ct1yRLHQTAEoHHd/SdLvJP2TpPFmtvWWhd0lLQuMYZkWbCNr\nHXWM2rFBmSLvaKYAFIKZ7WJm48ufj5R0hKQHNfBi+OHyt50q6abmZIgioI5QD9XszQcAeTBJ0jwz\na9fAG8Hr3f0WM1si6Tozu0DSXzSwBEch5GW5jc1rVwdjsSUQCqpmdTR5p5G64MQDEmM7fviCIScW\nWypj7KR9grGlf/plMDb64+HlCkLSbordX9oSjLXKEgghDW2mfMOqF7Ys+uHTgw5NlPRCvZ83/M8b\n1JC8UshjXtvntGe9n7B/bc8L6+765tY6yuPPRCKvoapYR+7+gAa2Idr++BNisVdUiTpCPTS2mXLf\nZvUwM1vo7jMamUM1yKt6zchpcB3l8WcikddQ5TUvAKgG90wBAABkQDMFAACQQbObqTlNfv4Q8qpe\ns3Nq9vOHkNfQ5DUvAKioqc2Uu+fyFyh5Va/ZOTX7+UPIa2jymhcAVMMGtikCgOHDzFZKytuMUPLY\nVtY89tx+0lOtUUe5zkGqTR5V1RHNFIBhLS8zCckjn3lUKy/55iGPPOTQ6DyacpnPzI4ys4fN7DEz\nO7sZOSQxs6fM7G9mtsjMFjYxj6vNbIWZLR50bCczm29mj5Y/TshJXueZ2bLyz2yRmR3dwHyoo3ge\n1BEANEDDm6nyqrOXS3qfpP0lnWhm+zc6j4h3u/v0JnfVcyUdtd2xsyUtcPd9JS0of91oc/XqvCTp\nkvLPbLq739qIRKijqswVdQQAddeMM1MHS3rM3Z9w982SrpN0XBPyyC13v0PSi9sdPk7SvPLn8yR9\noKFJKZhXs1BHFVBHVcvLze/ksa285FGtvOSbhzzykIPUwDya0UxNlvTsoK+Xlo/lgUv6tZndZ2az\nmp3MdrrdfXn58+cldTczme3MNrMHypdvGnXZiDpKhzraTl5mEpLHtvKSR7Xykm8e8shDDlJj82j2\nOlN58053P0gDl45ON7NDm51QEh+YNZCXmQNXSNpb0nRJyyVd3Nx0coE6GjrqCEBhNaOZWiZpyqCv\ndy8fazp3X1b+uELSjcrXppc9ZjZJksofVzQ5H0mSu/e4e5+790u6Uo37mVFH6VBHg+RlEkOzJi3k\nYZJCK0xIyEMdDecaiuTRsDpqRjP1Z0n7mtleZjZC0sck3dyEPLZhZqPNbOzWzyUdKWlxfFRD3Szp\n1PLnp0q6qYm5vGLrC3PZ8Wrcz4w6Soc6+sdz5m0SQzMmLcxV8ycpJOUgFWRCQs7qaLjWUCgPqUF1\n1FGvBw5x95KZzZb0K0ntkq529783Oo8E3ZJuNDNp4OfyU3e/rRmJmNm1kmZKmmhmSyWdK+lrkq43\ns9M0sEjcCTnJa6aZTdfA5aKnJH26EblQR5VRRxW9MomhnNfWSQxLGvDcueDud5jZ1O0OH6eBfx9p\nYJLC7ZK+3OAcimRY11EeaiiSR8M0vJmSpHJ3mKt3GuX/CAc0Ow9JcvcTA6HDG5rIdgJ5XdXwRMqo\nozjqqKKkSQxva0Ie0j8mLbik7zf5Bt68TFKYbWYfl7RQ0pnuvrpJeVSSlzqihpI1pI64AR0Ami+X\nkxaaOEmBCQlDRw29WsPqiGYKwHCVm0kMOZu00PRJCk2c2JJGLuqIGnq1RtYRzRSA4SoXkxhyOGmh\n6ZMUmjixJY2m1xE1lKyRddSUe6YAoNlyNImhaZMW8jBJIUcTElLJSR0N6xqK5NGwOrKBy5kAAABI\ng8t8AAAAGdBMAQAAZEAzBQAAkAHNFAAAQAY0UwAAABnQTAEAAGRAMwUAAJABzRQAAEAG/x8B11AN\niLc7OgAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "l_cc_phm = phm_list[2][2]['L']['CC'][0]\n", - "l_mlo_phm = phm_list[2][2]['L']['MLO'][0]\n", - "fig,ax = subplots(1, 4)\n", - "fig.set_size_inches([10, 8])\n", - "ax[0].imshow(l_cc_phm[:,:,1], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[1].imshow(l_cc_phm[:,:,2], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[2].imshow(l_mlo_phm[:,:,1], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[3].imshow(l_mlo_phm[:,:,2], cmap='RdBu_r', vmin=0, vmax=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "collapsed": false, - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAEHCAYAAACKtsOUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XucXXV19/HvmlsmYTKQGyEGHoLIpSnVSFNUEA0gFy0a\nsJUHanmopQRfgBalIsU+greKVIhoEQwmBCoKPHJti6BGYrgoEtMQwtVAQ0PI/UIyuU1mZj1/zAGT\neH6/c2bvc9n7zOf9euWVmb3Ob+81Mys5v9mX9TN3FwAAAJJpqncCAAAAecZkCgAAIAUmUwAAACkw\nmQIAAEiByRQAAEAKTKYAAABSYDIFAACQApMpAACAFJhMAQAApNBS7wQAIC0zO0XSdZKaJX3f3a+K\nvr6l3a2toya55ZJZMNTU3BqM9fX2RHYaWW2jxEoc1hx+q2puHVJ0e8+Wderb0RX+QoodJw91ZOFz\nIKP2Gx2MrVu5JrbTyOHi51zc+yLBcKgp8jNt7xwejG17/fVE+5SkIR17BWMTRg0run35sv/RhvXr\nStYRkykAuWZmzZKul3SipFclPWlm97v7s8ExbR1qOewjtUoxd5pawhOm9hH7BWM7Xg+/Yff1dAdj\n3hd5Q5Y0pDM8SRj+loOLbl/70D9H97mnvNRRc1t7MHb6Z88Lxm65+vpgrKmlLRhraY9PFnu7twVj\n3tcbjA3Ze0ww9scnvD8Ye+o//jMYGxqpTUk65D1HBmMzP/FnRbd/7ORwLrviMh+AvDtK0hJ3f9nd\nuyXdLmlqnXNC/lBHSIzJFIC8Gy9p2S6fv1rYBgwEdYTEuMwHYFAws2mSpkmSWsP3TgAx1BGK4cwU\ngLxbLumAXT7fv7BtN+4+w90nu/tkawnfd4JBizpCYkymAOTdk5IOMbODzKxN0pmS7q9zTsgf6giJ\ncZkPQK65e4+ZXSTpIfU/0j7L3Z+pc1p1t/9Rfx6N3981Kxg78tmxwdjWNcuCsWrZsWltMDbu7UcX\n3b4+8kRiMXmpo9gTa7Ov+k6ifX7iNw8EY6ceuHd07L9cHD7mY7fMDsZidfTk7T+IHjOkZWj8ycP5\nd94WjL3/iaeKbt/wSrj2djt2Wa+qEGtpdxsS7h9RqtcIyjdyv32j8fUrV1fluN7dJe/ZPqDeLgNV\nso5iqLEBOfit8ftvt+2MP9Le2hwuhRVrtwZjOzevVu+2TWXXkbs/ICn8jgCUgTpCUqkmUwNucDZk\nuFoPDz9pWqrXCMr3kUsvisZ/cPW/VuW4PS8M/Kx4pevImpqDsb6enQPObzCbflu8V89za7qi8f2G\nF2+oKElfnjk/GFv2/z4XTwwAMiTxPVO7NDj7oKSJks4ys4mVSgyDA3UEAMi7NDeg0+AMlUAdAQBy\nLc1kqqwGZ2Y2zczmm9l87wm3ncegRR0BAHKt6q0Rdu/JMbTah0ODoo4AAFmV5gb0shqcASVQR0AV\nvPqb8IKwkvSBt30wEl1Q2WRKeNtxp0XjSx6+NxhrDjwxalV9prh+ulYtrfg+7/hgeIFk/eSm6Niz\nvjQtGHtsQvG2FWnEFuG+Z82D0bEn64+DsSlT31t0+88Xxr/+N/Mq61XF0eAMlUAdAQByLfGZqUQN\nztxpf1BB1hSeC+/bGX4kvdRYqXZtKpLU0ZEHj9ETd10YjG97NDwXGzU93nCwEeuzpT2+ftgPXv5F\nMHbS6O3Rsb3Hx58VuOzMcGuFFYseDcZ2bou3XACALEnVZ4oGZ6gE6ggAkGeszQcAAJACkykAAIAU\nmEwBAACkUNOFjgEgC9r3GanDTz2jaGz5My8Ex2185enofmMPMMQe+qjHgw/rl9S2/UFMrPVBKXde\nOqXo9jMfTrgY+gC0DO3QmInFH/8/+6/eExz37a/eGN1vz/YtwVisjUTS7+PRU48Pxq798cXRsS1r\nXw7GLpo6PVE+MbH1VT/S+f744E1rg6F/PPHQotsXT28vKy/OTAEAAKTAmakci/02+6MfL4yOHTrq\nD1Zs2c3WNfEWAvW0YMlqDT39O8F467DOYCyrrQ9Ktaoo9fN65o7PBWOzx0+Kjn14e08w9qsn4i0K\nrhv3rmjcHvn3YGzUoX8WjHU/E2/tAQBZwpkpAACAFJhMAQAApMBkCgAAIAUmUwAAACkwmQIAAEiB\np/kADDo7t27VsqeK94x6fdlzwXGlnrqM+aOTPhqMPfvgjxPvtxE0tbRG47HeQp+5u/jPcdmGbaly\nKkfH8KE65gNvLxp76PH/CY7r7U6e2w2Pfj0YO1F/lGif83/xX8FY5403RccOHfWWYCxrvdWGdI4O\nxo7+i8uLbt/x0vKy9s2ZKQAAgBRqe2bKLPFvdlntDxSb6UrSjkjH1WravOKlaHzbuvJm25lUoo52\nbt1Uw2Qq465/+2o0/rFPfDkaP+j4T4eDh70vSUqSpDM2bk88Vor/u137/BPBWKwDNABkDWemAAAA\nUmAyBQAAkAKTKQAAgBSYTAEAAKRAawQADcHMlkraLKlXUo+7Tw69tnfnDm1a/sKAj5HmQZj5Z4b/\nux32YOLdJrb6wnHB2L7Xr6hhJvHWB1L8Eft5M2cV3d6zduAP/wykhiRp4+o1uvfbNw78OClabJz/\n7svCwUfuS7TPdS/OD8a+cPXnE+1Tkj7+jnCNTTz54sT7DanKA19e3r95JlMAGslx7l6fR2jRKKgh\nDFhtJ1PumW1xkFS9Wh+U0rVqab1TqJ6c1lFzW3sw9tcX3xAd29udrkVBUnde+926HBcA8oR7pgA0\nCpf0UzP7rZlNq3cyyCVqCIlwmQ9Ao3ivuy83s30l/czMnnf3eW8EC2+O/W+QbXvVKUVkXLSGpD3q\nqJU6Qj/OTAFoCO6+vPD3akn3SDpqj/gMd5/s7pOtZWg9UkTGlaqhQmyXOgpfusfgwmQKQO6Z2V5m\nNvyNjyWdJGlxfbNCnlBDSIPLfAAawVhJ95iZ1P//2g/dPdxwoA4PMQz7m9trerxSYu0PDj/pL4Kx\n5396VzXSiarRz2pgNZRCmq/n5YTtD2JirRq+duk3Eu/3G+2D5zIokykAuefuL0t6R73zQH5RQ0iD\ny3wAAAAppDozNdBusRiYezf8Nho/bcSf1iiT6mqUOmopcUq7t3tbMLZ1zbJKp/OmzvGHRuOblr9Y\ntWMDwGBQict8dItFJVBHAIBc4jIfAABACmknU3SLRSVQRwCA3Ep7mY9usagE6giDWuzR9HqsQ/ni\nL+4PxppaWoOxvp6d1UgHZdoy47RgbPgF/xmMxX5usZ93qbF9Pd3BWOuwzmBs59ZN0WNmUaozU3SL\nRSVQRwCAPEs8maJbLCqBOgIA5F2ay3w16xaLhkYdAQByLfFkKmvdYmP3HLSP2C86dtu61xIfd0jn\n6Gg81ltIknq2bwnGGqWPVEye6uiPTvpodOxzP707Go/d+3Lapz8ZHXvAef87Gt/S68HYDzpGRMfW\ny4cuCD9rMPdrv6phJgCQDq0RAAAAUmAyBQAAkAILHQNAndWj/UHsknbscfe3Hjs1GHv5kftS5YR0\nxn3p2WAsaduKlvaOaLy7a0OiY9638clg7MMdk0onlkDstpwdm9ItwMGZKQAAgBSYTAEAAKTAZAoA\nACCFhrlnKnbPwfYNK6NjN18zJRoffsncYCztdVbkR5rWB1J8WYZ7v31jdOzM38yLxs87/6vBWF/k\nnoZSeVVzeZAHvjsjGOtZvaZqxwWASuPMFAAAQApMpgAAAFJomMt8AJBln/3qJcHYtf90TcWPF7t8\nKyW/hBtrfzB01FuCsTQrTeD3YisH/OKHlW9NMWzU+Gg81hohplrtD2KqeVsOZ6YAAABSYDIFAACQ\nApMpAACAFJhMAQAApJCbG9BL3UxpTc3B2L3rfx0d+8HF55Q4+twSceTFyLcdGY2vX7IgGBt9+Lui\nY7esWRaNp7kB99xzr0g8tpSxR7wvGFu56OHo2LaOkdE4fdgADAacmQIAAEiByRQAAEAKubnMBwBm\nNkvSqZJWu/sRhW0jJd0haYKkpZLOcPdkzW+qaOb3HgjG9jnwiGDs2499Pxi74E/PDMa6Vi0tK6+B\n6nroi8FYx8lfrsoxKynPNSRJdx8Xjh0479BgrLtrfTAW6zm28ZXFZeWVBa3DOqPxnVs3Ve3YnJkC\nkCezJZ2yx7bLJM1x90MkzSl8DoTMFjWECmMyBSA33H2epD1/xZ4q6ZbCx7dIOq2mSSFXqCFUA5f5\nAOTdWHdfUfh4paSxxV5kZtMk9a/F0bpXbTJDXpRVQxJ1hOIyNZkadejkYGzdi/NLjA5f8/1IZ/xx\neL/55hL7zp+W9vg/8p7tW2qUSe0NG3NAMLZx6dOJ97v2+ScSj5UkawqfCPa+vlT7HtI5Ohhr33tM\ndOyKhXOCsU89HW+N8J0/idzAUQfu7mbmgdgMSTMkqWnY6KKvAWI1VIhTR/gDXOYDkHerzGycJBX+\nXl3nfJA/1BBSYTIFIO/ul/RG591zJN1Xx1yQT9QQUsnUZT4AiDGzH0maImm0mb0q6QpJV0m608zO\nlfSKpDPqll/kUu7ry54LxjZ94+hgrPOVd4cPmKL9QdLLztVofxDLRUp/GXy3Y2W8hkrZ6+O3BmMf\n+Nuzg7GHFj9SjXSqevvCQJ3z+L3R+KzJJwdjsfYQ5WAyBSA33P2sQOiEmiaC3KKGUA1c5gMAAEiB\nyRQAAEAKTKYAAABSKHnPVC3XMdrw8lNpd1FUqZvg2jpGROPdXZlcoilq34nHROOvLfhpjTLpV9E6\nMit5w2rIg33xGju5KbxG2tBR46Njj/2LPVeo2N1DN94UjaexY9PaRLFSstZHCgCyqJx3pNliHSOk\nN1vUEQCgAZU8M+Xu88xswh6bp6r/0VKpfx2juZI+X8G80GCoIwwGsbOXW9csC8Y6P/94NdKJqvVj\n6zFNLW3ReG/39hplkg0jDnpHMLbhv8Nn16t59jskS3W0d2v8/FDa9gcxSe+ZKnsdIyCCOgIA5F7q\nPlOl1jFiUUiUY0B11EYdAQCyI+mZqbLXMXL3Ge4+2d0nW0t7wsOhQSWso6E1SxAAgFKSTqZYxwiV\nQB0BAHKv5GSqsI7RryQdZmavFtYuukrSiWb2O0kfKHwOBFFHAIBGVc7TfBVbx8iamtU6rDMY37l1\n00B3WRF57CNVSq37SJVS6fWwrKk5GIs9NfXnwyKLxkrySA3G9itV90maWi4ECwAYGBY6BoAyNbfF\n7/ssNeEGpNK/HMXaH8SMOnRyMLbuxfmJ9pknvcFHmKqP5WQAAABSYDIFAACQApMpAACAFJhMAQAA\npMBkCgAAIIWaPs3n7urr6a7lIdGARu03Rqf/wyeD8dlXfScYGzpiv+i+q9meI7Z46cZXno6OpfUB\nAGQXrREAoEzV+mWwY+yEYKxr1dJgrKU9+TqVPdu3JB6bRKwdQG/39hpmUn+tw/aOxmO9D2Pfx4Uz\nzgnGDvvYqmCsd8e2YKxne1cwJkltw0cGY9vWvRYdm8TbjjstGPtWxY9WPi7zAQAApMBkCgAAIAUm\nUwAAACkwmQIAAEiByRQAAEAKTKYAAABSqG1rBO8bdI/AplFqZfHB2nto3cq1+rdrb0o01vt6K5xN\n+bZtWBmMVfNn2fWdU6Lxjk89WLVjV5KZzZJ0qqTV7n5EYduVks6TtKbwssvd/YFq5VCtn9Mfn3Bs\nMPbMnPC4WNuErMnK/1dZqKNS7QZiYt/HjZ87Oxg771NfDsau++K1ifOpRvuDmCUP31vT45WLM1MA\n8mK2pGIzw+nuPqnwp2pvgGgYs0UdocKYTAHIBXefJ2l9vfNAvlFHqAYmUwDy7iIzW2Rms8xsRL2T\nQW5RR0iMyRSAPLtB0sGSJklaIema0AvNbJqZzTez+d7DvZvYDXWEVJhMAcgtd1/l7r3u3ifpJklH\nRV47w90nu/tka2mvXZLIPOoIaTGZApBbZjZul09Pl7S4Xrkgv6gjpFXT1gjW1KzWYZ3B+M6tm2qY\nze9ltQVBNY/b1NIajff17KzasdOadNj+emxu8Cy89nnvp4Kx7i2vR/cd+76U+p5seOz6aHzkseG8\nqvnzqGbrg396fl40/vW3nxSMDbRNipn9SNIUSaPN7FVJV0iaYmaTJLmkpZLOH9BOi2hp3ysY69m+\nJTp2zl1XB2Nn/OOdwdh/3f8fwVh314boMfMizb+tSqpVHVXr633rsVODsePWhNuvrE3R/iCm1j/X\nNP8+q6m2faYAICF3P6vI5pk1TwS5Rh2hGrjMBwAAkAKTKQAAgBSYTAEAAKTAZAoAACAFJlMAAAAp\nMJkCAABIoWRrBDObJelUSavd/YjCtislnSdpTeFll5ezyra7q6+nO3m2VVKvPlL1dPNN/zcaf3FN\nuF/H1y79xoCPV8k6embZ65r46XBfntjPs7d7W3Tfsb4opXpBjTjmwmg8xpqao/EhnaOj8R2b1iY+\ndhqX3ndZNP6FAw4Nxob+1ewKZ1N/vR6OffLvTgjGvnbp/Cpkky1Z7l1XDe895+xg7OWnV0XHrlw0\nNxj79dt/E4zte/2KknkV09wW7uReqh9cW8fIYGzHpjXBWNL33Xr2koop58zUbEmnFNk+3d0nFf6U\nfAPEoDdb1BEAoAGVnEy5+zxJ62uQCxoYdQQAaFRp7pm6yMwWmdksMxtRsYww2FBHAIBcSzqZukHS\nwZImSVohKbhQmplNM7P5Zjbfe+L3q2DQSVRHvdvrs4YjAADFJJpMufsqd+919z5JN0k6KvLaGe4+\n2d0nW8vQpHmiASWto+b28GLZAADUWqLJlJmN2+XT0yUtrkw6GEyoIwBAIyinNcKPJE2RNNrMXpV0\nhaQpZjZJkktaKun8KuaIBkAdIS/StG856S8vrWAm1TVz5peCsXPPvaKGmTSmZx5ZFIytezF5K4x9\nrw+3BnjbcacFY0sevjcYK9X+IKZ1aEcwtn1jvAVESKwNjff1BmMt7eFcJGnn1urdIlJyMuXuZxXZ\nPDPZ4bLZZyoNa4qf3KtmD6uW9r2CsaaWtujYcz7xxcTHbR0WvszWG+iXVMk62rlts1YtnheMx34m\nv3vg69F9f2XkEcHYrEnHl04uoZenxuvo8DnxPlRpXLx4bjB2zMLHomM7//oLJfb+eDBy2IkfDcaW\nrHikxH4BIDvogA4AAJACkykAAIAUmEwBAACkwGQKAAAgBSZTAAAAKZR8mg8AGo01NQefSq3m49NZ\n8rmv31fvFN5Uz6eiUzELPsa/dd1rNU4m3v6gWo6eGn7K+aEbX0q0z76enYnGrZy2dzQ+6lt1bI2A\nuHr+I+/ZHu43Yk3VW7on9mYT6wFSMX190a+9Y+yEYOyg4z8d3fXfRmL/8tKj0bGfO/i90XjMhLt2\nlHhFsn4t5fj2pBODsRsumF614y755QPB2I7Nr1ftuABQaVzmAwAASIHJFAAAQApMpgAAAFJgMgUA\nAJACkykAAIAUeJoPQC6Y2QGSbpU0VpJLmuHu15nZSEl3SJogaamkM9x9Q2xf7n3q2d418Bzy+gh/\nEeuXLKh3Cm+q5fetknUkmSywuPvOLeEnUt93buy5Yemxf/thMNbbvT2eUsAhJ5wejL38yE8SH++V\nl9cnyqcaRn1rWd2OzZkpAHnRI+kSd58o6d2SLjSziZIukzTH3Q+RNKfwORBCHaHiantmyr0uv7mN\nPvxd0fja55+oUSa1k6ffkCuta9XSxGOnjB4WjI0Zt1fi/Vbbzct+FYx94oD3RMfGGuTt2LQ2cU6l\nRH/j9T+sX3dfIWlF4ePNZvacpPGSpkqaUnjZLZLmSvp8JXNF46COUA2cmQKQO2Y2QdI7JT0haWzh\nDVKSVqr/8g1QEnWESmEyBSBXzKxD0l2SLnb33drxu7ur/z6YYuOmmdl8M5vvPdVbIQD5QB2hkphM\nAcgNM2tV/xvgbe5+d2HzKjMbV4iPk7S62Fh3n+Huk919srUMrU3CyCTqCJXGZApALpiZSZop6Tl3\nv3aX0P2Szil8fI6k7Kzgi8yhjlANtEYAkBfHSDpb0tNmtrCw7XJJV0m608zOlfSKpDOqlcBgfrCj\nHG0dI4Kx7q4SXQZqp2J11LZXpw446qSisaWP3h8c9/htd0T3m7T9Qczv5txT8X1K0ub1Wyu+z6cf\n/FYw9ienXFzx41UCkykAueDuj0qyQPiEWuaC/KKOUA1c5gMAAEghN2emmlpao/FYr5y0faRix44d\nt9piXXTnzZxVw0zyo1Qd/Z/9312jTCrr7999TjD2jz+/KTr2zxY8Fox99OOXJ84JAAYLzkwBAACk\nwGQKAAAgBSZTAAAAKeTmnikAqJg6rRPaCFraw2tUZqj9QU10d22MtkAI6dm+JRqfdPqZwdjCe24f\n8PGqafn8nyQa1zF2QjCW1fYHMZyZAgAASIHJFAAAQAolL/OZ2QGSblX/CtouaYa7X2dmIyXdIWmC\npKWSznD3qp3jrWcLgmoee0jn6GBsx6a10bHVbH+wefrxwVjnJXPDA614L7zBUEfWFP/dpJqXlfp6\nuoOx7334M9GxP7z+V5VO502x70lzW3hds54S30sAyJJy/sfqkXSJu0+U9G5JF5rZREmXSZrj7odI\nmlP4HAihjgAADankZMrdV7j7gsLHmyU9J2m8pKmSbim87BZJp1UrSeQfdQQAaFQDOpduZhMkvVPS\nE5LGuvuKQmil+i/fACVRRwCARlL2ZMrMOiTdJelid9+0a8zdXf33wRQbN83M5pvZfO+p/ErYyBfq\nCADQaMrqM2Vmrep/A7zN3e8ubF5lZuPcfYWZjZO0uthYd58haYYkNQ0bXfSNEoMDdYTMaGoK9ksq\n1QNosOvt3lbvFDKjZWiHxkw8umhszbOPB8eVWiP08anh2LB7ykot8yZ96Lhg7NGbb65hJpVR8syU\nmZmkmZKec/drdwndL+mN1VXPkXRf5dNDo6COAACNqpwzU8dIOlvS02a2sLDtcklXSbrTzM6V9Iqk\nM6qTIhoEdQQAaEglJ1Pu/qik4s2DpBMqm051DB31lmh827rXapTJHyrVS6pehn/mF8kGevErcI1Q\nRyPfdmQ0vn7Jghpl8oe6Vi0NB2MxVTdva2oOxqKX01jqBUCO0BkPAAAgBSZTAAAAKTCZAgAASKGs\n1ggA0FDcecQ/IramYjXXmMybliFtGvvW/QPR4i0TJGnj0sXR/Q77m9sT5fNA96Jg7ENtb0+0z7aO\nEdH4rS8+GIyt2tETjP19/rofRHFmCgAAIAUmUwAAACkMist8pVofNLe1R+O93dVbvqRz/KHB2Kbl\nL1btuGm85ciTgrEVy+bWLpEaK9VCYNiYA6LxbeuWB2Pf+94Xo2OnnXdlNJ7VyzJ9PTuDsQOP/nAw\ntmz5vGqkAwBVwZkpAACAFJhMAQAApMBkCgAAIIVBcc8UgPwzswMk3SpprCSXNMPdrzOzKyWdJ2lN\n4aWXu/sDJfcXWOpmSOeY4JjtG1cNLOmcauT2B5Wso44Vr+iYr32yaOyaR78ZHnfhpmiO+719SjC2\nctHcYCxp+4MxE8NtHNY+/+vo2DPf8q5Ex4wZcdA7grEN//1UxY9XCUymAORFj6RL3H2BmQ2X9Fsz\n+1khNt3dw+9ewO9RR6g4JlMAcsHdV0haUfh4s5k9J2l8fbNC3lBHqAbumQKQO2Y2QdI7JT1R2HSR\nmS0ys1lmVrRls5lNM7P5Zjbfe6rX7gT5kbaOtqm3Rpki6zgzpdJ9pFra9wrGerZvSXXsrPaSilmx\n8OfB2M5tm2uYSbZsXbMsGh/SOToYO//8L6c6dlbvcWlqaQ3GXnn834Oxnq6NwZiZdUi6S9LF7r7J\nzG6Q9BX13//yFUnXSPrbPce5+wxJMySpaa8xXtYXgIZViTra14ZQR5DEmSkAOWJmrep/A7zN3e+W\nJHdf5e697t4n6SZJR9UzR2QfdYRKYzIFIBfMzCTNlPScu1+7y/Zxu7zsdEnxVWQxqFFHqAYu8wHI\ni2MknS3paTNbWNh2uaSzzGyS+i/PLJV0fqkd7TVylCZ//ONFYyuXbgiOe+mX9w0sY7wptmzXl6/+\nbHTsFy7+50qmUrE6aj30EO333buLxvb+zLcSJ7i1xBJolbbm2cdrerxSkrY/2PTI9Gi889jPJNpv\nOZhMAcgFd39UkhUJlewpBbyBOkI1cJkPAAAgBSZTAAAAKTCZAgAASIF7psoQ6yV14NEfjo6N9dIp\npWPshGi8a9XSxPtOI9rTyGm7ErJj09pgrK2jaH/AN3V3hW+KliRrCv9edMLf/UGrnN38fMb3o/E0\n+np2Vm3fAJAVnJkCAABIgTNTAAadLevW6rFbZheNtQ7rrG0yg4Q1NQdj1986v4aZVM6KZSv1pX+4\nqmgsdrY4tjKAJP3PJ8JvzfvfNCEYi12tiB3z9X85NhjrvGRuMCZVZ/WFWBuN25b+Mhgbf8a/VjyX\ncnFmCgAAIAUmUwAAACkwmQIAAEiByRQAAEAKJW9AN7MDJN0qaaz61yya4e7XmdmVks6TtKbw0svd\nfdC14y/V+qDUjYaxR8fTtD6I3fwoSS3tHdH4zq2bku3biq3SQB2Vkqb1QX88fHPvY3fFv52xfY94\n66To2KW3nhuNj5hySTQe0mP8ngcgP8p5mq9H0iXuvsDMhkv6rZn9rBCb7u7frF56aCDUEQCgIZWc\nTLn7CkkrCh9vNrPnJI2vdmJoLNQR8iJ2VnZI5+jo2Fhj1qRiZw6r8Vh6tfR2bwvGXlvw0+jY2Bn+\nrDaGjf1s9p14THTsPl99JBgr1cw5JHY1YvhnfpFon6Ukrd3e7u3B2MWnfD4Y27xobll5VcOAzqWb\n2QRJ75T0RGHTRWa2yMxmmVm8hTNQQB0BABpJ2ZMpM+uQdJeki919k6QbJB0saZL6zzhcExg3zczm\nm9l87wnPNjE4UEcAgEZT1mTKzFrV/wZ4m7vfLUnuvsrde929T9JNko4qNtbdZ7j7ZHefbC3hrqZo\nfNQRAKARlZxMmZlJminpOXe/dpft43Z52emSFlc+PTQK6ggA0KjKeZrvGElnS3razBYWtl0u6Swz\nm6T+x9yXSjq/KhmiUVBHAICGVM7TfI9KKtY8aND1AkqiXk+alHrKJ/bEkiTtc+ARwVjnuAnB2KvL\n5hbPhzrw9xuPAAAHT0lEQVRKpdTPMxbftu61xMddv2RBND7qA0ui8WtfmBOMTTn9sGDsYy91xxMD\ngAyhMx4AAEAK5VzmAwCodB+pavSEqkcvqdjXceoF5wVjixaEz4KWWi0iJqu9pJJatTjcR0qK99Xa\n/vqaYKy5LfxwTqzPV8yHLpgWjZ966d8EY0+9Hn7y+oqNzwRje3ctD8Y6Pnx1NJ964cwUAABACkym\nAAAAUmAyBQAAkAKTKQAAgBS4AX2Qit1gKkkj/9fBwdio8cODsZVtzYlzQv6UarEx8dCRwVjr8GHB\nmDXzex6A/OB/LAC5YGbtZvYbM3vKzJ4xsy8Vth9kZk+Y2RIzu8PM2uqdK7KLOkI1cGYKQF7skHS8\nu3cV1nl81Mx+Iumzkqa7++1mdqOkc9W/gHbNxdoYxB53b2oJv2/3bN+SKqckYl/Hv//r94KxUme8\nMyLzdRRrBRGLxVsjJFsg/oHvzojHJxydaL/fe/+nEo2LiTWblqRNy18IxtpH7Fd0e9dL5c2pc1H5\nAOD9ugqfthb+uKTjJf24sP0WSafVIT3kBHWEamAyBSA3zKy5sLbjakk/k/SSpI3u3lN4yauSxtcr\nP+QDdYRKYzIFIDfcvdfdJ0naX9JRkg4vd6yZTTOz+WY233uSXfJAY6COUGlMpgDkjrtvlPSwpPdI\n2sfM3rj/c39JRdeicPcZ7j7Z3SdbS/jeEgwe1BEqhckUgFwwszFmtk/h46GSTpT0nPrfDP+y8LJz\nJN1XnwyRB9QRqqGmT/P5tnVrdy68+ZVdNo2WFF85tD4GfV4vLJhZ7kv3zOnAymezuz3qaND/rAao\npnlNiQWf/PWun5VTR+Mk3WJmzer/RfBOd/8PM3tW0u1m9lVJ/yWp7OLFoEQdoeJqO5lyH7Pr52Y2\n390n1zKHcpBX+eqR0651lMXviUReA1VOXu6+SNI7i2x/Wf33vWRa0sfd8yTWUqFaQu0Aeqz4hZe8\n11FM0vYH1dI6rDMY877eYOy393wtGPvLb/4yGHvlibnRfPaffHIwdsFfvaPo9unn3R3d5xu4zAcA\nAJACkykAAIAU6j2ZirdWrR/yKl+9c6r38UPIa2CymhcAlFTXyZS7Z/I/UPIqX71zqvfxQ8hrYLKa\nFwCUo95npgAAAHKtLpMpMzvFzF4orM59WT1yKMbMlprZ02a20Mzm1zGPWWa22swW77JtpJn9zMx+\nV/h7REbyutLMlhe+ZwvN7EM1zIc6iudBHQFADZi71/aA/b09XlR/o7RXJT0p6Sx3f7amiRRhZksl\nTXb3uvYHMrP3SeqSdKu7H1HYdrWk9e5+VWHiMMLdP5+BvK6U1OXu36xxLtRR6Tyoo3AOayRltedd\nlnKR8pvPgXu246m0Peoor9+nWslSPgPJpaw6qmmfqYKjJC0p9PSQmd0uaaqkur8JZoW7zzOzCXts\nnqrf90C8RdJcSTV9EwzkVS/UUQnUUTSHzPa8y1IuEvnEZLnnHfmEVSOXelzmGy9p2S6fZ2l1bpf0\nUzP7rZlNq3cyexjr7isKH6+UNLaeyezhIjNbVLh8U6vLRtRRMtQRAFQYN6Dv7r3ufqSkD0q6sHA5\nInO8/9psba/Pht0g6WBJkyStkHRNfdPJBOpo4KgjALlVj8nUckkH7PJ5cHXuWnP35YW/V0u6R9la\nWmCVmY2TpMLfq+ucjyTJ3Ve5e6+790m6SbX7nlFHyVBHxWWpNUOWcpHIp1xZy4t8wiqeSz0mU09K\nOsTMDjKzNklnSrq/Dnnsxsz2MrPhb3ws6SRJi+Ojaup+9a9kLmVoRfM33pgLTlftvmfUUTLUURFZ\n6nOVpVwk8ilX1vIin7Bq5FLzG9DdvcfMLpL0kKRmSbPc/Zla51HEWEn3mJnU/335obs/WI9EzOxH\n6r9JeLSZvSrpCklXSbrTzM5V/9MjZ2QkrylmNkn9l4uWSjq/FrlQR6VRRwBQGzVvjQAAWWFmp0i6\nTv0T8u+7+1V1zmeppM2SeiX11PrpJzObJelUSat3aVsxUtIdkiaof6J7hrtvqFMuV0o6T9Kawssu\nd/cHqp1LKdTRbsfOTA1F8rlSFa4jbkAHMCgVepVdr/4HBSZKOsvMJtY3K0nSce4+qU6Pkc+WdMoe\n2y6TNMfdD5E0p/B5vXKRpOmF78+kjEykqKPdzVZ2aiiUj1ThOmIyBWCwerNXmbt3S3qjV9mg5e7z\nJK3fY/NU9fckU+Hv0+qYSxZRR7vIUg1F8qk4JlMABqss9irLYo+yrPUmy1o/MuqotKzVkFThOmIy\nBQDZkekeZRnoTUY/svJkto4yUENSFeqIyRSAwSpzvcoy2qMsM73JMtCPrBjqqLTM1JBUnTpiMgVg\nsMpUr7IM9yjLTG+yevcjC6COSstMDUnVqaN6LHQMAHWXwV5lde9RlqXeZHnpR0Yd7S5LNRTJp+J1\nRJ8pAACAFLjMBwAAkAKTKQAAgBSYTAEAAKTAZAoAACAFJlMAAAApMJkCAABIgckUAABACkymAAAA\nUvj/RXGVUM1hHvkAAAAASUVORK5CYII=\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "r_cc_phm = phm_list[2][2]['R']['CC'][0]\n", - "r_mlo_phm = phm_list[2][2]['R']['MLO'][0]\n", - "fig,ax = subplots(1, 4)\n", - "fig.set_size_inches([10, 8])\n", - "ax[0].imshow(r_cc_phm[:,:,1], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[1].imshow(r_cc_phm[:,:,2], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[2].imshow(r_mlo_phm[:,:,1], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[3].imshow(r_mlo_phm[:,:,2], cmap='RdBu_r', vmin=0, vmax=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Explore another case" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "subj = 846\n", - "exam = 1\n", - "L = 1\n", - "R = 0\n" - ] - } - ], - "source": [ - "print 'subj =', phm_list[9][0]\n", - "print 'exam =', phm_list[9][1]\n", - "print 'L =', phm_list[9][2]['L']['cancer']\n", - "print 'R =', phm_list[9][2]['R']['cancer']" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAADzCAYAAACiwpMvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmYVdWV9/HfqipKQGYLaUQcoqJBjKhEM5jEOWhM1Lcj\nmjmtLZrEtu34xtjpRNHYtjHtlNFGUTRtNMQhjlEJapyNJSGKMyqoiMxYMlkU7PePurwpYe1Tde85\nd/5+nsdHWPuee3ZVLap2nXPW2hZCEAAAAArTUO4JAAAAVDMWUwAAACmwmAIAAEiBxRQAAEAKLKYA\nAABSYDEFAACQAospAACAFFhMAQAApMBiCgAAIIWmNAeb2XhJl0tqlHRVCOHCxNc39Q7W3C/NKbNh\nFh+jI3wqoX2lQsfahE/w5vLNo/6DhoSWbbZ1xxYuWxM9rqOjw403NDZGj2lf+W7SVOLIo1QKySMA\nKJeCF1Nm1ijpl5IOlfSWpKfM7PYQwvPRY5r7qWnXLxR6ysw0NPWKjm3oWFfCmdSejpduz+v1heRR\nyzbb6pzr7nTHLvn9s9FzrXhnqRvvO3Bg9JjXH70jOmYN8UUYeZROvnkEAOWU5jbfvpLmhBBeCyG0\nS7pR0lHZTAt1hDwCAFS1NIupEZLe7PL3t3KxDzCziWbWamatoWNtitOhRuWdR+8tX1ayyQEA0J2i\nP4AeQpgcQhgXQhhnTb2LfTrUqK551H/wkHJPBwCA/y/NYmq+pJFd/r5tLgbkgzwCAFS1NIuppyTt\nYmY7mlmzpOMl8dQo8kUeAQCqWsHVfCGEDjM7VdK96ixpvzqE8FxmM+vCGvw1X9iwoaD3K6TSigrA\n4igkj+bNfVsTT5rkjq04e4/ocRf2PdyNrz3yiOgxx43/UHSsqXf8n8/QyTe58Q8fG+/60P5e/Fmw\n9e08bwgAlSpVn6kQwt2S7s5oLqhT5BEAoJrRAR0AACCFVFemAKASVO1uDJUstlNElXT3L8VuDJnl\nUeRRlj1b/B/Rf1vUnvcpmvsNcuPtK1e48cbmPm7cIjtGdKxZ6Z+4TvKIxRSAqlbNuzFUsqyfVS21\nUuzGkFUeNfXe0o3/eaK/AGr5Wf4Fz9vt789z7iP+52nQ9mPc+Bb9B7vxhbMfduP1kkfc5gNQ7eii\njyyQRyhYVVyZamhqduP7HXdc9JjHr/9N3u8nxaumqNirDoPOi+/N19T7NTf+xtL4MYf+5KHo2I2L\nr4yOHf3zx9z4zBu+Hz3muMgxkvTm7BejYyvmxedfLb/9peR10d9v0xeZ2URJEyVJvfwrAahr5BEK\nxpUpAHWB3RiQBfIInqq4MgUACSqui/5nTznJjd97RfxqZrFtMaDFjb/ftsSN18lVza7KlkcHf/PL\nbrx5RKzlXv7TOugXp7vxq8ce5MZXLnzdjb+3YI4br/Zno9LiyhSAakcXfWSBPELBuDIFoKqVcjcG\n1C7yCGmwmAJQ9eiijyyQRygUt/kAAABSqIorU7F2BY/95troMa/f/7Po2L1z4hvKfuuU89x4vTxE\nV8s61q5y49t99qzoMSvuiLcyWP+4/+CmJP1t0jQ3/uFbb4weU6jBO+4ZHetY63clfm/Bq5nPAwDq\nVVUspgCgmpSzai8mtrBG+V3xzjVufMAVHZmdI1a119zP72jevnK5G491a6/3Cw7c5gMAAEiBxRQA\nAEAKLKYAAABSYDEFAACQAospAACAFGq2mm+nw86Ijm3oWFeyefQdOjI69v67i6NjsXYQvQcNix6z\ndsXCnk+smlmDGpv9DUZjn7ckzf2HRMd2/ffHo2Ofveq/o2MhUjlz2a/Ojh5z+rf9thySNHr8F6Nj\nrz/1WHRszdK33fhHPn9c9Jhn7vhddAwfNGDEKDfeNv/lEs+ke2HD+nJPAREP/vxhNz7zsx9y4/u8\nODz6XvlW1cWq9n73m/9045991W/7MmjS3/I6b63hyhQAAEAKLKYAAABSYDEFAACQAospAACAFFhM\nAQAApFCz1XwAUGyxqr1/+MgBbvydZx4s3mRQ8QZtP8aND5/2Ezf+/Op2Nx6++aPM5hRz3Nf+w42P\nm/CVyBH1Xc2XajFlZnMlvSdpvaSOEMK4LCaVhYam5uhYUmsEa/Av1hW6ieOapfOjY0nvGZtHUvuD\nPlttkzAPv0S+EuSdR2FDQS0QYo6ffm10bIe+vaJjlxx1WnTsqV73ufGPJrQ/SHLpPedGx8Y37BEd\ni+XRs3f9PnpMbONTKV5GDQD1LIsrUweGEJZk8D6ob+QRAKAq8cwUAABACmkXU0HSfWb2tJlN9F5g\nZhPNrNXMWkNHdrdmUFPIIwBA1Up7m2//EMJ8M9ta0nQzezGE8FDXF4QQJkuaLEkNfVtCyvOhNpFH\nAICqlWoxFUKYn/v/IjO7VdK+kh5KPgr4IPIIlSL2wH5MrIjk52ce6sYnfD2e1oUWufRUrPAm66Kb\numKmhqbNi1T6D9/ZffmQ7XZy4+MnfN+Nx6pCSyGWF63Tri/xTKpDwYspM9tSUkMI4b3cnw+TVFip\nUhF0rF0VHdvuY0dGx9a0LXPjPz79sOgx3/62vyGklFw5uO2+n4uOvfWXu6Jj0XOt88topeQfEuX8\nplnKPPqXZx9w41MO/Ub0mPOfvDE69vDWj0THnrx5nhtvu/7/RI8Z8O9PRMeSKvZuWfSX6NgxLfkX\n2CZV7Hk/ODYq5QbiAFBJ0lyZGibpVjPb+D6/DSHck8msUE/II2Siklu1oDqQQyhUwYupEMJrkvbM\ncC6oQ+QRMkaLDaRFDiFvtEYAAABIgcUUgFqR2GKD9hroAdq0oCDszQegViS22KC9BnogvzYtWw4l\njyCJxRSAGpFFi41YZWtjc283Htsj8tiv+pvEJlVDlquqts9WI9z46sVvlngm5ZdvDg3eeqgOP+2k\nzeIP3/+S+/q1q9bkNZ91a1fm9fok+bbAiMV7Dxrmxn9w9glu/Ozv/lcPZlf9qmIxFftG1tjcJ3rM\n+vZ40q5cEt/0d8XcZ934KRMfix5TqLNP+mh0rPHkfd34iSeeEz3m/bbCnpls6r2lG09qL1GN1kd+\nhxww3O/9Ikm//KeLomPt/+jvAC9Jp3zuTjd++uEXRI/R2IOiQ0k/aA/8xYnRsXcbGt34wLNnxueR\nYIuBQ6Nj77+72I2XomVCpbdqQeUjh5BGVSymAKAbtNhAWuQQCsZiCkDVo8UG0iKHkAbVfAAAACmw\nmAIAAEiB23wA0I1Y1V6+kh7G/+dZ97vxvZ78sxv/zsnnZjKnWNVerPKQPRj/bvmixfr9ZVdsFt/r\nmOPc18+8+Ya83n/py61ufODID0ePaZvvVxKuvOILbnzLiX/Ia07tK/39ayed+d9uvN+wHfz5LJyb\n13krHVemAAAAUqiKK1Ox3woL/W1x2ZzCysKzNvGkSeWegqR4C4RYXxKpfD1xutOy237RsV/FBp55\nMHrMCZGrBZJ01p/jrQzOisQ/8qP/iR5jd/0+Opb0+R406W/RsaS+RoVYszTeVqTv0JFuvB77FQGo\nL1yZAgAASIHFFAAAQAospgAAAFKoimemAKASbTVqnBs/+Eh/O6hpl0Sf3NNfz7/KjZ//tH/Md7qZ\nW1pU7fVACO7zjPlW7Q0d/Qk3/ugIf1vAUdNfiL7X49u+4cYvWLFrXnOKyTcvYlV7uxx8jBt/Zcat\n+U6pInBlCgAAIAUWUwAAAClwm28TldKorhLmUantD5IsffmpvI/pPWhYdGzquM/GD0z4WsS+fk9+\npV/0mKF/2S46ltTgbv7XB0bHxrbu7saXvPhE9JhCv+7vv7u4oOMAoNpxZQoAACAFFlMAAAApcJsP\nAAr0Dzvv6Mb/bZrfA/+W3vE91Z6+6bdufJs8u9g3Nvd241ntLxh7/5iszltNxk34iht/bsYDbvzG\n165x418ec54bnzJlVPTcHz/xHH/gzJ9Ej/F86FNHufHXHr4tr/eJWfjic2582JhP+a+f/XAm5y0W\nrkwBAACkwGIKAAAghW5v85nZ1ZKOlLQohDAmFxsi6XeSdpA0V9KEEMLy4k2zdPpuNcKNJ1VTFUNj\ncx833txvSPSYtSsWFms6m9liQIsbX9/op1Sp8qiQSrRifN5+/IK/QfKQ7wyNz2P5OwWda8R170bH\nvnrm3m78/t7xqsK3Z95X0DwKuZ0T3YjZrKA5AEA59OTK1FRJ4zeJnSVpRghhF0kzcn8HkkwVeQQA\nqEHdLqZCCA9JWrZJ+ChJ1+b+fK2kozOeF2oMeQQAqFWFVvMNCyEsyP35HUnxrodAHHmEqvbyg/e6\n8T1/e7Eb7zj68uh7xW55/tOZ33LjUy74mRsvdvXcho52Nz5w5Gg33jb/pYT3qs39/1qnXZ/X63c5\n3r8dP/nr/t6Pex/xb9H3auq9pRvvN8yvPF0xb7Ybz6pqL6Zt/stuvKmP/whCUhVpJVSMpn4APYQQ\nJIXYuJlNNLNWM2sNHeX/gFGZyCP0hJldbWaLzGx2l9gQM5tuZq/k/j+4nHNEZSOHUAyFLqYWmtlw\nScr9f1HshSGEySGEcSGEcdaUX38S1DzyCPmaKp69QzpTRQ4hY4Uupm6X9I3cn78hqbjXA1GryCPk\nhWfvkBY5hGLoSWuEGyQdIKnFzN6SdI6kCyVNM7MTJc2TNKGYkyylVYvfKNm5rCG+ll23ui2veKm1\nr9z0e1GnsGG9G6+3PLryn37qxod2xFs3vPHEnQWdK9peQNIt1/jr0zVL50eP6bPVNtGxpM2MY1/7\npHYV0WdmQvSOr6dHz96Z2URJEyVJvfznSlC3evz8JnkET7eLqRDClyJDB2c8F9Qw8gilEEIIZuau\nxEIIkyVNlqSGvi15rdZQP5JyKDdOHmEz7M0HoNotNLPhIYQF3T17l7XYleK+CVV7MbGrdHff7e9h\nVi6xK42xqrAqkWkOxe46xD532924yn+faWfkfe6Otf57JVVVlsM5Lz3kxs+NvH6rUX5loyQtmzPT\njRfSxLlQbCcDoNrx7B3SIoeQCospAFUj9+zd45J2NbO3cs/bXSjpUDN7RdIhub8DLnIIxcBtPgBV\ng2fvkBY5hGLgyhQAAEAKdXllKqmUvKGpOe/3iz3w151SPhyXJN+HJRPH8itpr1lzH7ndjf9x/bPR\nYw5v3CM6lrSVQmx7D0lavfhNN96r74DoMU//9szo2O6Hfzc6du6Lf3bjZ4/6VPQYAKgFdbmYAgBP\nIb9YFNuCWTPKdu5ii+0jV+gvqJVi69GfdOOLnn/UjcfyK8u8K9c+iFsMaHHj5+766bzeZ9Ui/xdD\nKb4vZCkrTLnNBwAAkAKLKQAAgBRYTAEAAKTAYgoAACAFFlMAAAAp1GU1X1JVQ7kqHrKQ1PIh6eOq\nlBYN9SCp/UGSk1v/GB27+Pn/jY6tX7rAjd/4+R9Fj+l/Xby9RVPvftGxWmiBUOx/C4X+Gy2mWNuN\nttv/rxvfcvz5mZ272qv2YhbOftiN9x40zI2vXbHQjZ8w63433svi5/6fPQ9KnlxKsQrM5T/5hBvf\n9Vp/smuW+x/z8tf/5sbbVy6Lzin2+StldS5XpgAAAFJgMQUAAJACiykAAIAUWEwBAACkwGIKAAAg\nhbqs5iulpOqdpMqodavfdeNJVQjVXIlYjWKVIlJh1SJJuTL14K9Hx574/ero2N4/8iuypux9SPSY\ncP170bFt9/1cdOytv9wVHas34yZ8xY0/fdMN0WN2P+JYN77bKH9vs9nP+RVMF99+jhs/ss+ebnx9\n+1o3vu237nDj/Yfv5Ma3HDrSjb/zzINuPElzv8FuvH3l8rzfq5K837Y4r9eftcTfGP2ilnhVcN/I\n12GXT/j7BV50m1/Z+82xp7nxd555wI0P+p5fwbjrQUe68dg+hTsfeLQbn/PAH9y4JI0e/0U3Pn7/\n7d34JT+8OPpeheLKFAAAQAospgAAAFJgMQUAAJACiykAAIAUWEwBAACkQDUfAGSsddr1eR/z8oP3\nuvHn71npxo89/RQ3fuR0v2ovJlZFGtsjLWbozmPyen9JChvWu/Gqqdozcz++WGV1vlW+ow75V39g\nbML+e4vfdMMjRgxw4x9/1N//b8En/yVxbpva7ZDPu/Hn7v59Xu+zeM5Lbjy2r6EkvfLQfW78xT+t\nceOx/Shjla090e1iysyulnSkpEUhhDG52CRJJ0naWOf5gxDC3QXPQtmXmVeKhqbm6NjO+x8cHXv1\nsQfd+PttS1LOqDxKlUexsmBJWh35JlOorPMyqbVFU3Of6NhP75oUHbu3udGNFzr3xS8+GR0bOPLD\nbvzdN18o6FybKlUOobaRRyiGntzmmyppvBO/NIQwNvcfSYfuTBV5hHSmihxCelNFHiFj3S6mQggP\nSVpWgrmghpFHSIscQhbIIxRDmgfQTzWzZ8zsajPz29VKMrOJZtZqZq2ho/D7kahZ5BHSIoeQBfII\nBSt0MfVrSTtJGitpgaRob/YQwuQQwrgQwjhr8h/6Qt0ij5AWOYQskEdIpaDFVAhhYQhhfQhhg6Qr\nJe2b7bRQD8gjpEUOIQvkEdIqqDWCmQ0PISzI/fUYSbOzmxLqBXmEtGoph9atbnPjfbbaxo1Pu+RX\nbjy2SfCrd57nxpet8dsT7HPMf7jxjrWr3PhrD9/mxlt228+NS9KShOrQfLjV4GY9P77QPAqhajaY\nf+yux9x4y3V+64JY+4DlD/oX7RpWLXXjO73xjhtfONvfGHnlwtfd+LuXH+rGJenspaPceHxD4+y/\nZj1pjXCDpAMktZjZW5LOkXSAmY2VFCTNlXRy5jOrEbFvPJLUf0i83H3d6neLMZ2yKVUerVk6P+1b\nfECltOxom/9yfPBm/4eYJG23z/5ufJeDj4ke88qMW6NjSa05CmnbEf38Oj8I+V6ELJBHKIZuF1Mh\nhC854SlFmAtqGHmEtMghZIE8QjGwnQwAAEAKLKYAAABSYDEFAACQAhsdA0AFGLyjv0HxinnP5vU+\nsU2CP3SEX5235dDt3HhS8Uw+lr8W3zA5q/0cm/sN2SzW0eDvS1mvVszLr9A1Vs139oDd3fjxn9vZ\njS+ct21e541tNtw+55noMSc+foMbv0T98jq39zF3WM+uOXFlCgAAIIWKuTJVyjLzSvHkb3+T6fv1\nHjQsOrZ2xcLoWKw8vRq/JlnPOen9Gpp6Rcey7j3T1HvL6Nghx54VHbOdP+PGGx+fET3mkl/+KDp2\n5hmXRMe++cQdbvyqsQdFj4l+fkOIHgMAlYYrUwAAACmwmAIAAEiBxRQAAEAKFfPMFADUukHbj4mO\nzfv2ADc+4Hv5PQcYewYytvdfvlVe+YpVZ0n5V+3FeFsZhfUdmbx3JUrKo4ZezW78xBMOduO/uPh6\nN75m6dtufMqhJ7rxy+a9Gp1TFlou9+cjSRs6/Kq91rsudeNfuczfF/C1R/+0Wcx6uMcjV6YAAABS\nqIorU1lXm40e/8Xo2PP33JT3++1+xLHRsefu9nfklqQrJk+Kjp323cvdeKyHjJRcsZdkny96W1VJ\nrdP831iqVdZ59NEJx0fHCqnUXDHJ7zMkSYPPi/caWv5Dv++LJA254EU3bgk9eL7/PT/3JGl9+5ro\n2IlH+n1mNtx5f/SYqxMq/QCgWnBlCgAAIAUWUwAAACmwmAIAAEihKp6ZAoBKFOuCHzasd+Pvr1wW\nfa/jX/+EG+/V19/bLladF3sGsLnfYDee9BxmufQfvpMbf29BcSvGyiXf5zm36O9/LSVp0PChbvzS\n837lxvPdg3HAiFFuPKuvTWy/xtVL50ePie04cdGMV9z4S9Nv6fF8Yv+WN8WVKQAAgBRYTAEAAKRQ\nFbf5st68tpD2B0mS2h8kbYZ7ysRJ0bGVv/68G189uzV6zNa/XBAdS7J1i7+JbtLco5c+e9jgrByy\nzqOXH38mOha7VJ3UpHDI+c8XNI9BCW0T7rjxv9z4j37n3zqSpOen3xsdmzP9suhYY+RLP2Drj0SP\nAYBawJUpAACAFFhMAQAApFAVt/kAwMxGSrpO0jBJQdLkEMLlZjZE0u8k7SBprqQJIYSSlKjFqohi\nYvudSdLdv5qcdjqSpC0GtLjxjrUrM3n/mF59/b0FY1WHSZ7Zb54b3/EPeb/VZioxj/J9BGHhbH9v\nuc6xtLPpFKv+PO7o0W78ktY/uvF8K16fuuYUN/61q+OPuMz8w21uvG9zfKcHjztX9uYDUGM6JJ0R\nQhgt6WOSvmNmoyWdJWlGCGEXSTNyfwdiyCNkjsUUgKoQQlgQQpiZ+/N7kl6QNELSUZKuzb3sWklH\nl2eGqAbkEYqBxRSAqmNmO0jaS9KTkoaFEDaWsr6jzts3QLfII2Sl22emKvH+clqxe/uSNHS3j7nx\nt2feV9C58n2mYqNdp3S48fmthbU/SJLVsxqSpBDccC3mUe+B/rMpknTfhX5riz3Gnx49JilXYs/B\nSNL7bUuiY9+65BE3ftjkM6LHzBp7UHRsp4NPi45FjTkg/2MSmFk/STdLOj2E0GZdnmkIIQQzc5PQ\nzCZKmihJ6uW3A0H9II+QpZ5cmeL+MrJAHiE1M+ulzh+A14cQNu4JsdDMhufGh0ta5B0bQpgcQhgX\nQhhnTb1LM2FUJPIIWev2ylTusueC3J/fM7Ou95cPyL3sWkkPSvp+UWaJqkceIS3rvHQwRdILIYRL\nugzdLukbki7M/d8v7SmCpMa2nsbmPtGxQqrePElXKvOxw/5fcONzH7ndjWc1f0na8Q/+lfkYd2+7\nSBVWJeZRTCy/Cr3jkY/Yno2X/PDivN6n9+B/cOOrF7/pxkcd8q95vX+SqRf+PK/Xu5/XyN2WTeXV\nGqGQ+8tcEsWmyCMU6JOSvibpWTOblYv9QJ0//KaZ2YmS5kmaUKb5oTqQR8hcjxdThd5fDiFMljRZ\nkhr6tvRsiYeaRR6hUCGERyTFmr4cXMq5oHqRRyiGHlXzpbm/DGxEHgEAalG3i6ke3F+WKuT+MioX\neQQAqFU9uc1Xc/eXkx6UXDj7ITee1E4hywcvN5ofac1fSu5DnTn5bn+gKs2jpt7x57NWvPlCdGyf\nL85049dcfV70mF/c/WJ07O05C6Njk277cXTs0/dd7sZHtx4SPUb5f20BoK71pJqP+8tIjTxCLRq5\n73g3/sfG37nx3R/19yOrRG88Uf5f6HrK/eWuh1VYlSxW/VmKar6YWM+7WBXpRw470I0/cf11mc0p\n9ot/Ab/0F4wO6AAAACmwmAIAAEiBxRQAAEAKLKYAAABSyKsDej1Y3742r7gkbbP3YdGxQjdILqVK\neHivnAaMGBUda5v/cnSsY+2q6FjvQf6G8zM/+qnoMWe19I2OfWnH+ObDJ2//ieiYDstuawYAgI/F\nFAAUaPWKpW58jzlD3HjYEP+lLF/N/Qa78YamZjfevnKZG49VhpWzYixfQ0dv/gvF4jfuL8NMkvUb\ntoMbv+qvfvXnSft82Y1n2Y6nZbf93Phpt/+3G7/44IluPFbNt+Tt/OYa+0V08A5joscsmDUjr3PE\n5LPH46a4zQcAAJACiykAAIAUWEwBAACkwGIKAAAgBRZTAAAAKVDNl4Fqbn8gVV8LhF59B2h4pB3F\ngll/ih4X+ziT2h8UqmPtSjd+wX3nRo/pf8aD8TdMaMOA8ln8/GNlO7c1NLrxD33Mb7/x/D03ufGB\nIz/sxlctfsONJ7UEKRfv69Cxxv83WE4rF85148dv41fUacGrmZ27//Cd3PgJt1zkxj++59Zu/N3I\nBu/jJnzFjXesy28/yrUr/E3dVy/1K2SzlGaPR65MAQAApMBiCgAAIAUWUwAAACmwmAIAAEiBxRQA\nAEAKVPMBQM5OnznKjb/659uKfu4+W23jxtvf8/fUi1WMxqr2Gpp6ufG2+S+58V59B7pxa1jjxqut\nKriYdtj/C2587iO3F/3csb3tthmzpxu/aMFn/HieWzOOP+9kN37s6zPd+FWR19920sVufMEzD0XP\nHatWL2VOspjaRGNzbze+vr2wDUoLbUkQ+wcRKxtNc65qs251W8W3o4iVrSe2P0jw3fPPiI79/IIr\no2P9t9nZjS+b43+Dq2RmNlLSdZKGSQqSJocQLjezSZJOkrQ499IfhBDuLs8sUenIIxQDiykA1aJD\n0hkhhJlm1l/S02Y2PTd2aQjB3+Ye+CDyCJljMQWgKoQQFkhakPvze2b2gqQR5Z0Vqg15hGLgAXQA\nVcfMdpC0l6Qnc6FTzewZM7vazAZHjploZq1m1ho6Crttj9pCHiErLKYAVBUz6yfpZkmnhxDaJP1a\n0k6SxqrzioP7BGsIYXIIYVwIYZw1+c9Gon6QR8gSiykAVcPMeqnzB+D1IYRbJCmEsDCEsD6EsEHS\nlZL2LeccUfnII2SNZ6YAVAUzM0lTJL0QQrikS3x47jkYSTpG0uxCzxFrgfCPp3/Ljd982a8LPdVm\n1ix9O7P38sQqTDd0+DXw7SuXF3M6mXIrmc3815Ygj6447ZNu/DOnDnXjfY6fUuipNhOr+H5p+i2Z\nvP8r4/3K8FENB7jxnx53vhtft5e/Wf2GJ+7Me059h450468c3ubGz93um2588vmX533ujbpdTJWq\njHTYGH+nc0la9PyjbrwY5f5bDPSTffXiNwt6v6Q5JrUyeL9tsRuv1vYHpcqja958PDr26FK/P86R\n2/v9dCTp6MH7RMeSvhZb9Pd3OF+32v/H3Z3LJv0sOhb7YShVZwuEBJ+U9DVJz5rZrFzsB5K+ZGZj\n1ZlXcyX5DWyATuQRMteTK1OUkSIL5BFSCSE8Ism73EAvIPQYeYRi6HYxRRkpskAeAQBqVV4PoFNG\niiyQRwCAWtLjxRRlpMgCeQQAqDU9quaLlZF2Gb9SUv6P4KOukEeoGNbg7sMZ24PzsQdfzvPty7/x\n6qby3V80tjFyUsFDseU1pxCKPBtpj8ENuufYPpvFfznmY+7rP/VYvJCk0sRyeItBW0Ze3+7GP3LE\neDf+1I3/68YL2R93zdL5bnzEdf6/N2v4efS9CtWTar6il5FK0prl8Q18s/4G9KFP+TvDS9JrD2e7\nO3xSxVeSSq7MK0SmeWQW/aa61+wnooed+Pkz3fhVBf5wGLT9HtGxYbv4Gwy3r/F3bpekeU/eGx3b\n0OF/owJn6uctAAAIpUlEQVQAlF9PrkxRRooskEcAgJrUk2o+ykiRGnkEAKhVbCcDAACQAospAACA\nFNibD0AdCgob1vf41UccsbsbnzJrhhu/Y9Vf3fjRgz8RPceoAw534w/98EA33rL/d6LvlYVyVu3F\nVNqc1q1aq7cff2mz+EWjP+O+ftrU/OYfK2D67rP3R4856tPbufEnz73ajX/vVH/vvOunnufGd/xn\n//Wxr02sai8mVrUXq/Ir5L2KUeDFlSkAAIAUKubKVNv8/Pq4pFFI+4NCNxiutRYHlaCxuY8Gjhzt\njg266qzocUN23tuNj9h1x+gxz0+PPx+//PW/FTQGAKgtXJkCAABIgcUUAABACiymAAAAUqiYZ6YA\noGRCyKsybMoF+e2p9tQb77rxjj6rose8dL+/LeXKMz+d17mryRYDWtz4+21LSjyTwry0vq8OaPOe\nxfTnf9AvTnfjJ785y40feMKlbnz5uvizuDc+MM+NX/iHrdz49yLv89UTznXjsX83xd6PMt+9JUuN\nK1MAAAApsJgCAABIoSpu88UuH1pDY/SYrJu70eKggphkjf7XfuCuO0QPW3Lzk378RT8OAEBPcGUK\nAAAgBRZTAAAAKVTFbT4AMLPekh6StIU6v3fdFEI4x8x2lHSjpK0kPS3payGE9vLNVDp3V78CL2l/\nsS2H+nuq7XDQaZnMqRKVo2ovyzwKYYPWt6/p8bmv//QEN/7ozx5x4++cuYMbP2uGRc+xXd9efvyr\nU5Int4nY3pUNTf77V9q+iaXGlSkA1eJ9SQeFEPaUNFbSeDP7mKSfSLo0hLCzpOWSTizjHFH5yCNk\njsUUgKoQOq3M/bVX7r8g6SBJN+Xi10o6ugzTQ5Ugj1AMLKYAVA0zazSzWZIWSZou6VVJK0IIHbmX\nvCVpROTYiWbWamatoaOyGwCiuLLKI63r+S0+1LaSPjNljU1q7jfYHYvdn5WkdavbIsfQrqAerV+7\nWsvmzHTHBl9Q4smgpEII6yWNNbNBkm6VtFsex06WNFmSGvq2hOLMENUgszzqtzV5BElcmQJQhUII\nKyQ9IOnjkgaZ2cZfDLeVNL9sE0NVIY+QFar5AFQFMxsqaV0IYYWZ9ZF0qDofGn5A0hfVWYn1DUm3\ndfdeYc3SJetmXbNxE7MWxTZTy9gm9U4fOG+JbzyW7GMu43m394KZ5tGqxUvWPPGLHudRrN5t9kM/\ndeN9/PCmPnjelZFX3XZGj94sD+XKoVKf282jTbGYAlAthku61swa1XlVfVoI4U4ze17SjWZ2vqS/\nSuq2BjyEMHTjn82sNYQwrliTjinXect57nJ+zF2QR1V83nKfO4bFFICqEEJ4RtJeTvw1SfuWfkao\nRuQRioFnpgAAAFLo9spUpl2HzdS4RR93aM3St/ObOapKNXWvRt2ZXGfnLee5y/kxF1u9fU7rMX+j\nenJlim6xyAJ5hIqUK3Wvm/OW89zl/JiLrd4+p/WYv0m6XUzRLRZZII8AALWqR89MZdZ1eN3qLOaM\nKkX3agBALerRYiqEsD6EMFadjcz2VZ7dYkMI40II46xX3wKniVqQWR419S7aHFFfzGy8mb1kZnPM\n7KwSnneumT1rZrPMrLXI57razBaZ2ewusSFmNt3MXsn939+aIvvzTjKz+bmPe5aZHZH1eUutXDmU\nO3dJ8ogc6l5e1Xx0i0UWyCNUglyfoV9KOlzSaElfMrPRJZzCgSGEsSXolzNV0vhNYmdJmhFC2EXS\njNzfS3FeqfMZybG5/+4uwnlLpgJySCpNHk0VOZSo28WUmQ3N7V+kLt1iX9Dfu8VKPewWi/pFHqEC\n7StpTgjhtVwF6Y2SjirznDIXQnhI0rJNwkep8xlFqUjPKkbOW2vIoU51n0M9adqZWbfYDe+9s6Rt\nxn+WfAuHbjCPbOcQa72fXdfhMm0F0o1KmEclzEEqbh5laYSkN7v8/S1J+5XgvFJn8cV9ZhYk/U8Z\nqpOGhRAW5P78jqRhJTz3qWb2dUmtks4IISwv4bmzVs4cksqbR+RQF90uprLsFlsJrfc3xTxKMwfy\nqD7mUEnzqHD7hxDmm9nWkqab2Yu538JLLoQQcj+MS+HXkn6szkXAjyVdLOmEEp27FlVEHpFDdEAH\nUL/mSxrZ5e8le2YvhDA/9/9Fkm5V6bcxWWhmwyUp9/9FpThpCGFhrhBlg6QrVf3bt5Qth6Sy5xE5\n1AWLKQD16ilJu5jZjmbWLOl4SbcX+6RmtqWZ9d/4Z0mHSZqdfFTmblfnM4pSCZ9V3PjDN+cYlf7j\nzlpZckiqiDwih7oo50bHldLBlHn8XSXMIV+VMudKmEclzEGqnHkkCiF0mNmpku6V1Cjp6hDCcyU4\n9TBJt5qZ1Pk9+LchhHuKdTIzu0HSAZJazOwtSedIulDSNDM7UdI8SRNKdN4DzGysOm/RzJV0ctbn\nLaUy5pBUwjwih7pnIZTqNicAAEDt4TYfAABACiymAAAAUijLYqqc7fc3mUfJtnTY5Lxlac3fgzlU\nZJt+DzlU/hxKmEfV5BEAZKHki6kKab/fVam2dOhqqsrTmr+7OUgV2KZ/U+SQpMrIodg8pCrIIwDI\nSjmuTNVF+/0k5WrN34M5VAtyqAJyKGEeAFBXyrGY8trvjyjDPKS/t+J/2swmlmkOG5WzNX9Xp5rZ\nM7nbN0W/TVQgcshXKTkkVUceAUAm6v0B9P1DCHur83bRd8zs0+WekNTZml+dP6RL7deSdpI0VtIC\ndbbpRzJyaHPkEYC6Uo7FVFnb73dVAVs6dFWW1vxdVWqbfgc55Ct7DklVlUcAkIlyLKbK1n6/qwpo\nxb+psrTm76pS2/Q7yCFf2XNIqqo8AoBMlHw7mTK33++qpFs6dFWu1vw9mENFtunfFDlUGTmUMI+q\nyCMAyArbyQAAAKRQ7w+gAwAApMJiCgAAIAUWUwAAACmwmAIAAEiBxRQAAEAKLKYAAABSYDEFAACQ\nwv8Dv3SnAlPHf3wAAAAASUVORK5CYII=\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "l_cc_phm = phm_list[9][2]['L']['CC'][0]\n", - "l_mlo_phm = phm_list[9][2]['L']['MLO'][0]\n", - "fig,ax = subplots(1, 4)\n", - "fig.set_size_inches([10, 8])\n", - "ax[0].imshow(l_cc_phm[:,:,1], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[1].imshow(l_cc_phm[:,:,2], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[2].imshow(l_mlo_phm[:,:,1], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[3].imshow(l_mlo_phm[:,:,2], cmap='RdBu_r', vmin=0, vmax=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAADvCAYAAADW1lHPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmYXVWV9/HfSlUqA5VASCCGEAlCABEk0BFoZQYVHEBe\nlQZppJWXOAC+Dq0i2oo29kPbAi0GoYNEEFHABiTQyCCDCLZAsJkHCRCGEJKACSGQqVLr/aMu/YTc\ntW/uuefcsb6f5+Ehte4Z9rm16tauc/ba29xdAAAAqM2QZjcAAACgndGZAgAAyIHOFAAAQA50pgAA\nAHKgMwUAAJADnSkAAIAc6EwBAADkQGcKAAAgBzpTAAAAOXTn2dnMDpb0I0ldkn7q7qdX3L57uFtP\nb55TotHM0q8Fs+f76uXyvpUVdopOQR41y7BRmyRfmzJhdBgfumJJcp8HF/eH8e17lif36d5iq7LY\ns88+q5dfeilTHgFAs9TcmTKzLknnSHqvpOcl3WNms939keQ+Pb3q3v7QWk+JJrAh6ZuX3l/+i7Pv\n8dnZjk8eNdXkfQ9Lvjb7nw4I4xPuvyq5z9bnr4iPtcUfkvuM/d75ZbF9935PcnsAaDV5HvPtLmmu\nuz/l7qslXSop/ckMxMgjAEBby9OZmijpuXW+fr4UA7IgjwAAbS3XmKlqmNl0SdMlSUM3qvfp0KHI\nIwBAq8pzZ2q+pEnrfL1lKfYm7j7T3ae5+zTrHp7jdOhQ5BEAoK3luTN1j6QpZra1Bn75HSnpE4W0\nChuUdWB4CyOPMuoenr4zN36nfcL4v39xrzB+7JfOSx5ru4OuztawCrZ/rCf5mt12UllszePPBVsC\nQGuquTPl7n1mdqKkGzRQ0j7L3R8urGUYFMgjAEC7yzVmyt2vk3RdQW3BIEUeAQDaGTOgAwAA5EBn\nCgAAIAc6UwAAADnQmQIAAMih7pN2oj4aNf1Bm02z0FH2+tSnwvicq9Jj9efP+W0YP+KTN4TxWr6/\nRU/LEe4TLKINAK2KO1MAAAA50JkC0PbM7GAze9zM5prZyc1uD9oTeYRa8ZgPQFszsy5J50h6rwYW\nyr7HzGa7+yPJfbqHu/X0NqqJrccSf0d75zzW99XL5X0rrdrtyaO0iauWh/GuxLv77LBR8QvR43uL\nD7L568vC+KIRiWMXIWiLr3q1qjyiMwWg3e0uaa67PyVJZnappMMkpX8J9vSqe/tDG9S81tPVE69v\nuXb1yga3pH76Hp+ddRfyKOFLc+8I42OGxp3yz24dL18VjY9MjcE8+v6bw/iPt98/jBdhSPfQstjq\nR66sbt+iGwMADTZR0rqL+T1figFZkEeoGXemgAKl/srq6hmR2L4reaw7fvazMB799bQhRVZlvuWd\n6b8MD7nwm2H8Z7sdlNynURWjZjZd0nRJ0tD0YtFAJeQRItyZAtDu5kuatM7XW5Zib+LuM919mrtP\ns+74MRcGNfIINePOFIB2d4+kKWa2tQZ++R0p6RPNbVJjpe5W9vetCeOpsVE9vWPC+OrlS2pr2Dr+\n7cl43M3XpuwTxpswx92gz6MRY7cI4wdu+7Yw3nfmr8L4aW/fPYx/c8reZbHU93n6p3cN4xe9sF0Y\nP/7GmWH8jHfsF8YjveO3LostfWJYVfvSmQLQ1ty9z8xOlHSDpC5Js9z94SY3C22GPEIedKYAtD13\nv05Semp4oArkEWrFmCkAAIAc6EwBAADkwGO+klRJey2DIE889UthfMapZ2U+FtrLxpN2DOMvfD8e\nTDny7y9OHmvoyNFhvG9lPBtxLWpZtHjBffFkepI0a+oBYXz0hG2T+yyb/5fkawDQDuhMARh8zCp2\nJNfXhMqyTLqHx0uaZK3CK6JqL+Wr28SzYt97zRlhfK9P/ltZbMXLLxTaJrzZo7+OlyPc6agzw/jy\nQ78SHyio2stq53s2CeOfvu+8MH7a3MvC+IxPX1EWW7XspXDb1a+9UhbztWtTTXwTHvMBAADkQGcK\nAAAgBzpTAAAAOdCZAgAAyCHXAHQzmyfpVUlrJfW5+7QiGtUMWQeYVlps9muPxdPaz0ws1TBw/niQ\n203d94bx/ZZNqdC69tJueZRackOSXnnukTA+8u8fCuO/WRJ/fyXpd6f+IowP+/hhyX3Wehy/5Rvn\nhvEHrokHbUrppSUqDQJO7UPFXn3Vc+B4ViM3mxTGh2+8WRg/8PMXhPF2GGy+0dix2vXoT5bF7/n1\n5eH2qWV8Gi1VfDHt85eE8eUL59WxNbFL5v8pjG+8/dgwfu7CzcN4arB5vO3ispj391W1bxHVfPu7\ne/WtBWLkEQCgLfGYDwAAIIe8nSmXdKOZ3Wtm04toEAYl8ggA0LbyPubby93nm9nmkm4ys8fc/fZ1\nNyj9chz4BTl0o5ynQ4cijwAAbSvXnSl3n1/6/yJJV0naPdhmprtPc/dp1j08z+nQocgjAEA7q/nO\nlJltJGmIu79a+vf7JH2vsJblkKpU6OoZkdynb+VrYbxS1V7KWy+NjyWl4mkHdMdrvdmQ9BT3qaUl\nKl3/yqULszWsIK2cR6MmbBPGh3T3JPdJVVWN3S4uUBz6z6cnj3XxP14UxlfscXjm8684Nl6a4aR/\nviV5rN7u+OdoRJcl91m8Ks7LnybW7Gsa95ZfIiay9Fvx58Emp8VVpPU0boc9wvjYSRPD+JN/uKGe\nzWmKvjX9evnFV8virVK1l3LK6V8N42f+a3qt0EgRa9p2D4+fNIybOCqMX/vgojDete+B8Ql22q/q\ntoTt9kSJ9HryPOYbL+kqM3vjOL909+tzHA+DE3kEAGhrNXem3P0pSbsU2BYMQuQRAKDdMTUCAABA\nDnSmAAAAcqAzBQAAkEMRy8kAQNNlWuPRLKxEaqUKv5MevLUstuX5kxvfkISXHrsrjF9/xllhfI/f\nXxfGW6nyLes6oateXaq5t17diKbVLMrzY2/6frjtTyfsH8ZT6yQW8fMyasK2YXzfHxwdxq/e/qgw\n/tra5v7stnxnqpbSy9RrqekPKp2nv29NhdbVXy3nTy2afPTn/y65zwX/cnbm87ST1Pe3d/zWyX1e\nXfBk5vOkynxf/sucMP6dK7ZPHuuV5x7NfP6UjY6cFcaPT89yoBO/vl8YH7drXJ4vNadEfz2s8Yi8\nyCFkxmM+AACAHOhMAegUrPGIvMgh1KTlH/MBQJUqrvH4pvUde1jfEaFs64SSRyjhzhSAjrChNR7f\nvL5jemklDF7Z1wkljzCAO1MA2l7mNR5baG2+1Pqf5+z6vrJY/8J5mY6916c+FcbvvChe97GI92Ta\nB78UxkeM3SI+59q4YKbR64XWtE5oG+TRl+6/qSy2Xf9+4bY25M4wnlqHcfmL8+LjdHWF8agqcMnT\n94fbDv94HL9uh7Fh/I9PLQ3jjdLynalWSdRm6eoZHsYrlROnqhY7vWKvktTixLVU7FWy5PpTw/io\n/eKFRedcfkmh509ZeusPwvjovdPVov/xX3HcfvtYEU0qGms8Ii9yCDVr+c4UAGwIazwiL3IIeTBm\nCgAAIAc6UwAAADnQmQIAAMiBMVMA0ER7HvWJMP7Hi+OKu8iSO88J41sd8aNMbZk47ZAwPn/ObzMd\nJ7LqlcVhvNlLdnWK/9lpQRif2n9AeTBR2PWOD3wsjM+75+4w3rdyeRj/0PRjwvhvzj4vjGdxxKRD\nw/iK3hfjHTKs/RguO2YV1txaB3emAAAAcmiJO1PDRo9LvrZqWWPWmxw2erMw3qi5TlIL5FZanBlv\n1tM7Jvla6i+oWozfae/ka/13XB7GU3cOxrznhMzn/7/33ZJ8bURX/FfU6L3juX9qUWm6klqm8gCA\ndsedKQAAgBzoTAEAAOTQEo/5AKDTTZh6YBjPMtA8ZdO9TwrjWVeQKGKgeQoDzYvxwG5/DeMn73lK\nGPf7ZlZ97IeujYcpZHX1jOrPmdVri58N46lcHzpydFlszevLwm17ejcti/UNiZfGWR93pgAAAHKg\nMwUAAJDDBh/zmdksSR+StMjddyrFNpV0maTJkuZJOsLdl2zwbGbhytarl8e3LRtp1bJ4DpQfn/ft\nMH7bY/H2knTrtXeF8SVP3Zfcp8iqvamHHxnG77vq0sLOkVUj8qgS749Xpq/FokfildUlaeNvxbea\nV1wxLYzP/WD6PHs8Ee+jCtV8M3aJHyWl5pWp5PaxT4XxTaekq293vjv732fh97LKuV0AoBVU88l3\noaSD14udLOlmd58i6ebS10AlF4o8AgB0oA12ptz9dknr3zo6TNIboyYvkvSRgtuFDkMeAQA6Va3V\nfOPd/Y2561+UNL6g9mBwIY/QcbbZ97Aw/uTvr67bOUdN2DaML5v/l7qdM6uX7ognrh23V/aJaweD\nt+75oTA+96R3h/Hrjo6r+bJITbrbPbw3jE87PG7jXZf9OowXMZxl8e0/DuPzX42rRT/wTzeUxV74\nc3lMiicJ97V9VbUr9wB0d3dJnnrdzKab2Rwzm+N9zIKMGHkEAGhXtXamFprZBEkq/X9RakN3n+nu\n09x9mnXHvV4MWuQRAKDt1fqYb7akYyWdXvp/VfevzYaoq2dEWTw1gVbFY0WrO5ekJu8avkn6KdKl\n534xjH/iCz8J42teeyV5rFqup0ipqr1a3rM6qymPBqr5esrCa1evSO6Sur5a3pPfLJmT3OegX38/\njJ++01FhfPPbbk4e6+XPfDeMX7DbQcl93rb3h8P4wsfuT+6TcuENcdXgH6Z+JbmP91+V+TzhZI6e\nvEkJAC1ng3emzOxXkv5b0vZm9ryZHaeBX37vNbMnJB1U+hpIIo8AAJ1qg3em3D3+k1pKTGgDlCOP\nAACdirX5ALSNQid/zSla80uqb9VeStaqvVTVliXWISuiCitVtffa9d8K4xsdfFruc0ZaKYekxKS1\nkvpWrw7jR39hRhgfs/UuYXzJ09U/4l+7Oi7uScV3+cqxYXy3xJy7P955/6rbkvK13reH8aOfjIdf\n1HO9yXWxnAyAdnKhmPwV+VwocggFozMFoG0w+SvyIodQD3SmALQ7Jn9FXuQQcmnomCl3V39f/Bw4\n87FqKOWvVDr/kaO+EcZTYwsqXUeq3L5J0w+0zPmLYkO6NHSjjcviK15+IfOxanlPzvhyPGZBkv7+\nn34fxv9hdbzQ8ufujafekKRvbrVTGF/6zEPJfVLjdUZP3C6Mf/L49ErLMxbOi1+4Ofv0B43i7m5m\n4bwKZjZd0nRJ0tCNGtkstJFKOSSRR4hxZwpAu6tq8lcmfkUFTCCMXKjmA9Duapv8NafhG28Wxps9\naW813vLOuKrqvQfFa/xdeHq8HloWqbv8R167OPexC9CUHJKkRZ/fPIyPO/vGTMep5c58XvtvNjKM\n7zb9b8P4j6/J/3Rko664VHDcyPKJnBuJO1MA2gaTvyIvcgj1wJ0pAG2DyV+RFzmEeuDOFAAAQA6N\nvTPl/cmZVBuh0liGEWO3COPdwcLMkvTqgicznz81ZqCS1PvVgosWN0zPyF69dbc9y+KP33RlQ85/\nx0UXJ1875aF44eIRw+MftS1mLU0eq78vHkty0oO3Jve5+avnhPFHrv/PMH7xBfGM1wCA6nFnCgAA\nIAfGTAHABkR3gmu5O51Xah23jSftGMafP/fDYXyPGfFafhf9IL6zWYTUXfb/Om9W3c7ZaqLv32Yz\nFgRbZrdJhnnprlh4d7jtqD/cFsYnbzIsjL/4d3F+vTjn6TAubZWIVy+1vt+MD345jEc/u6mnN+ET\nH0ssNLge7kwBAADkQGcKAAAgBzpTAAAAOdCZAgAAyIEB6CUrl7wYxod0FzdFfaVpIbJOm9Dp0x9U\nsmrZkoZNgxAZu927kq+9OOeGMP7+h/473uEfTs18/tfWJtdg1Z+P2zSMD78+3v6V5x5NHqtVF+xu\nhiKueZfDjgzj9199adXH+OvJU8L483t/Koz/aXm8IPsTt98SxlOfd9HC7oMxD+oh6/v4PzvHywbu\n9vAjYbynd0xZ7ILP/zTc9tfDXwrjJ/xuRRj/xWs7h/H+V+K8m7zXIWWxeXfMDrfNavxO+4TxFx+4\nrZDjbwh3pgAAAHKgMwUAAJADnSkAAIAc6EwBAADkQGcKAAAghw1W85nZLEkfkrTI3XcqxU6VdLyk\nN1ZiPcXdr6tHA7NMkZ9HqqKiZ1RcHZWq/qt0rEqauQB0IxSaR0OGqHv4RmXhvpWvJXdZcme8TMaY\n95ywwdOtb/Ejf0y+NvTKuDLlyT33DeNTPndm5vP/PLj2N8z6+MzMx0sZjNVaqara6OcztbRLf9+a\nMJ6q2hu52aT4nKvKK6gO+suu4bbz/vNXYfyoX58exrdJLIj9ykvxz9D8Ob8N40VIvV/tbOjI0WG8\nq2dE+bYjesNt+1bHFXTvfmGLMH7e0/Fi5k++Vv7+bvGDk8Jt7zw0rjj9RU9cwZz6zE39bBRVuRcp\nomovrmYtbjmZCyUdHMTPcveppf/q0pFCR7lQ5BEAoANtsDPl7rdL+msD2oIORh4BADpVnjFTJ5rZ\nA2Y2y8zKZwUrMbPpZjbHzOZ4X2c/ykJNMueR1sS3vwEAaIZaO1PnStpG0lRJCySdkdrQ3We6+zR3\nn2bd2Wb5RserKY80tHzcAQAAzVJTZ8rdF7r7Wnfvl3S+pN2LbRYGA/IIANAJalqbz8wmuPuC0peH\nSyq2tG4dRVbtpdYak9KVSytefiGMbzzp7cljrXxlcRhftSxe92iwqjmP+vsrVu5FPvzTOdkaJ2n5\n704L4+/49l3JfX76w1lh/NjEGmknnf+PyWN9acdovL404+yvJvf57PRTk68VpZafo0LO24DK4ixV\ntVmr0FJVe68vfq7qYzx8861hPPU59eOd9w/jd3xuzzB+4DFx9V8klQfv+MDHwvhD115e9bHrqdA8\nMgvfhzWvLws3j+L/PenBcNtdH9w8jM///Y/D+MR94xwYMba8+m/GsK5w2/f17xjGf/aTr4fxb/x7\nvMbjwoduD+ORVB7ZkLiN9az+DH/+vbrPtGqmRviVpP0kjTOz5yV9R9J+ZjZVkkuaJ+kz1TYWgxN5\nhAJcKGmGpJ+vFz/L3X/Y+OagTV0o8ggF22Bnyt2PCsIX1KEt6GDkEfJy99vNbHKz24H2Rh6hHpgB\nHUC7q6oiFNgA8gg1ozMFoJ1VXRHKNC2ooMY8YpoWDKAzBaBtZakIZZoWpNSeR0zTggE1VfMBQCuo\nuSLUhoTr8NVzjcwsVXspqaq9lFSl1M/uejaMZ6k4TlVutkrVXha15pEN6daw0ZuVxVcuXVj1uVNV\ne5++L1Ep91pf1ceW4nVkjzvuO5mOsWRFXEGXWj8wi1Qe1bMyeNjocWE8T8V9R3amsi5AWotjjovL\n1iXpJ6fNKOw8jRKVz0rZP7wbwiz8Hlf6/j7/xMthfPgm49PneSKeAuGZP16T3GXq4fFCoY989PAw\n/u018eKkkvTgN34Txj87PS67rkXqQ0VKf7BU+pCr588eFaEoAnmEeujIzhSAzkNFKIpAHqEeGDMF\nAACQA50pAACAHOhMAQAA5MCYKQCDj/fXtXKvCN3DNyqLZV2TMlUscMG/nB3G37b3YWH8qT9cnem8\nkVRlYZaqraEjR4fx1Fp49efq7ytfdzNV2BJV+UVVpZI0a+oBYfzij2WrxOvp3bQslrVq7dvfuziM\nL184L4y/4wMfD+OPXH9FWayeVXspa15/pfBjdmRnqp4LIb5hxqln1f0cjRSVz7aqjcZsqqlHlFfN\n3X3pJcl95t0xO/N5ej+XrtpLue+qSzNtPzlR/SZJ/bvEH6ZFKnrx7eiDW8pWKg4A7YbHfAAAADnQ\nmQIAAMiBzhQAAEAOHTlmCgDa3U6HfLgslhqT980ffD2MT78nHmi+1a/jZUCevjMeJ9jTO6Ystnr5\nknDblCsuPi2Mf/SYb4XxaGDy2sTyJeHgdrPqG1cj7+8P25RlgHPWQohmDLY/5jMfCeO//NkNYfzR\nG+PVG4oYbL7JVjuF8aXPVLeSlFSfcdXcmQIAAMiBzhQAAEAOPOaDpObM9VGrla+v1Nw5j5bFW+Ea\normBpPT8QGPetkvyWC//ZU4hbZKk5Td9L4yPev+pyX1qeT+ZAgHAYMSdKQAAgBzoTAEAAOTAYz4A\ng49ZWAHWCo+K35BlNv3vf+1fw/i1h5WvFCBJm277lzC+5Kn7wviQ7p6q25Lyf44+JYyf8x/x8igf\nnDK2LDb5gC+E22667W5lsZefvjFD62rT1TNcoyduXxZf8vT94fbDRo8ri6VWIRiSWB1hy2nvD+PP\n/unaMF7EKgfH77lVGP/j/fEwhfuvLh+GUZRU1V4RyxWNnrhd+fmejpf7WR93pgAAAHKgMwUAAJDD\nBh/zmdkkST+XNF6SS5rp7j8ys00lXSZpsqR5ko5w92yzuKGtZZkor8g8Wrvqdf117p9ztLw6taxQ\nn6raS92yL7Jir5Le9347jKdWrJeyTyZYq2ZNuAgARanmzlSfpK+4+46S9pR0gpntKOlkSTe7+xRJ\nN5e+BlLIIwBAR9pgZ8rdF7j7n0v/flXSo5ImSjpM0kWlzS6SFM83D4g8AgB0rkzVfGY2WdKuku6S\nNN7dF5ReelEDj2+ifaZLmi5JGhpPaIjBJXce9ZBHg1GhQw7cW6pyr17uv7r6isBKRo6bWBZbtWxx\nuG3W9/WEz3w3jmc4xtJ5D5bF1q5KrONX8JCDVOVe5H2f/GhZ7JoZ/xFu29UzIozfPvGmMD656lZk\n966PfCOMp4YCpIY1jJqwbVnsleeKqfwr4ud52fzyKtf+Koc7VD0A3cx6JV0h6Yvu/qZBI+7uGkjK\nMu4+092nufs0666uxBCdq5g8ij9k0PF4VIwikEcoXFWdKTMbqoFfgJe4+5Wl8EIzm1B6fYKkRfVp\nIjoFeYQ8eFSMIpBHqIcNdqbMzCRdIOlRdz9znZdmSzq29O9jJV1dfPPQKcgjFKnWR8VmNsfM5nhf\nYyoV0drIIxSlmjFT75F0jKQHzeyN6XFPkXS6pMvN7DhJz0g6oj5NzC71vLa/b01yn57eMWF89fL2\nm+0h62K7tQqfUXv4lE4qMo8aNN5l9Jbls+FK0pKn0mMkbu19LIzffdrPwvjXvvAvyWPVco2vz/5K\nGN/8+CvC+GuLn00e6/2fPT6M33De+ZnbVUnGPCp7VGzrTKPg7m5myUfFkmZK0pCR49InwKBAHqFI\nG+xMufsdklKTvhxYbHPQqcgjFKHSo2J3X8CjYlSDPELRWJsPQFuo4lHx6arTo+Isa6p1okZMkru+\naNLc1IS5d155elnsmA/eHW7biDwaudmkML7LiUeXxa7ZYZ9w29S1To5vMtdVf9/qjNvHT4GKqtyr\nlzxr89GZAtAu2m7IAVoSeYTC0ZkC0BZ4VIwikEeoBxY6BgAAyKEl7kyFC52W1FLR1D28N4xXqsxr\nx6q9lKKr9garf7jqh2H8jHfsl9xn36XbxC+ceFrm86d+Lir9TIw89IzM50nZ5vOfCOOfr7DPT965\nf+bzbDzp7WWxpU/fmPk4ANAs3JkCAADIoSXuTAFAK2uVyr1a5tArwtzfnV0W2/agL9T1nKlqtsjf\nHlo+v1rf3OeKbE7MLLyDvOLl+eHmI7pSQ7Uaa49PHBPG7/rlxWF8+Ji3hPEVL79QWJtaQUPW5gMA\nAEA5OlMAAAA50JkCAADIoSXGTBW9zlpqttbUc2Ip/awYg9epN34vjH+3Jz0jbu8J1xV2/maPU6il\nMq8W0c+rV1ibDwBaTUt0pgCgoczCwdypgdzRION6L7Y9YuwWZbG+FcvDbWuZDiaLeg82b1fdw0Zq\n7HbvKounlt/53t8cXhaz5X8Nt03lV0/vmDC+5vVXwvjrv/hkWWyjT14SbpvSaQPN64HHfAAAADnQ\nmQIAAMiBzhQAAEAOdKYAAAByoDMFAACQQ9tW81VaHDm10G+jpj+oZYFatJ5RX7qlqedPVW41ytCR\no8N4lmU+qvHqgifLYv1rVhV6jjLumZZgacbP7nNHly89svlPVoTb/uu/fz2Mf7WGBbYbrZ0/L/tW\nvqaXHrur6u2LWJYoa4XmV/9SXv1302XfD7fdYdyIMD7pwC+H8XovY1SEcTvsURbL8j2rFnemAAAA\ncqAzBQAAkAOdKQAAgBzoTAEAAORAZwoAACCHDVbzmdkkST+XNF6SS5rp7j8ys1MlHS9pcWnTU9y9\nuFVeN6BSpUdXYiHa1ALIGzpeVu1QhdJorZpHlaTyyIZ0JfdJVZL2jp8cxt++/17JY91z6S/SjWuA\nId09YXz579IVYr0HfatezSk0h6x7aLj2XT3XIMtatTbu7PlVH7sdqvZ+9PSdYfz/bf2eMD5s9Liy\nWGr9uehnss/i97vQzyKzuq3bmPXzJ/XZ85PTZpTFZrRBFV7q52XjSTuG8aXPPBTGp+6zc1nsjLOO\nCrc9+w9PlcWuOPlPqSa+STVTI/RJ+oq7/9nMRkm618xuKr12lrv/sKozYbAjj5AXOYQikEco3AY7\nU+6+QNKC0r9fNbNHJU2sd8PQWcgj5EUOoQjkEeoh05gpM5ssaVdJb8x4daKZPWBms8ysfGawgX2m\nm9kcM5vjfStzNRadgTxCXrlzaM3rDWopWln+z6J4ElUMPlV3psysV9IVkr7o7ssknStpG0lTNdDL\nPyPaz91nuvs0d59m3fEzYAwe5BHyKiSHho5sWHvRmor5LIpnDMfgU1VnysyGaiDpLnH3KyXJ3Re6\n+1p375d0vqTd69dMdALyCHmRQygCeYSiVVPNZ5IukPSou5+5TnxC6dmzJB0uKR5KD4g8Qn5F5pD3\nrQkr9+q5TlynVfmm3qtUFeg/vuOQ+ECJtR7HbrtbWeyFP9+YaE1Qnebx+13oZ5F77u9r6n1MVZ9n\nPd/OH/xoWez+qy/NdIyU4ZuMD+Mrly6s+hg9veHTVL31XfuG8efuvaPqY0vSF390Ulnsb37+7nDb\n/1p5d1nslgXPVHWeaqr53iPpGEkPmtl9pdgpko4ys6kaKC2dJ+kzVZ2xAdaujsfUVFocGXXXMXlU\nSSrHVr6yOIzvvVP8YSRJt9z2b2H87Il7Jvf55pS9K7Qum76V8ULLRU9/EJWAJ8ra2y6H0JLIIxSu\nmmq+OySUfikeAAAI4UlEQVSVL18utcRcQGgP5BHyIodQBPII9cCtGgAAgBzoTAEAAORAZwoAACCH\nagagA8CgkKqUGtI9tCzWX+f1zaKB+bUURdRL6r1KtTFr2xfc97vMbWp19VrHr5Lb3n5vWWzTa4qp\nWs1StZeyevmSMD731t/kPrYkHTpqWlmsP7GO4SHdU8tiq618vb7IoOpMFZm0lSoDW7UEOvqF8IZh\nG28Wxuu58GsnSn3vU4uQnvmtcF5ASdKPEgudrq1QsTdxWlx+nvrF5P1rk8fa4aAPh/E7d3s4uc/3\nhsfnr3Sd4S/ZRFk7ALQiHvMBAADkQGcKAAAgBzpTAAAAOQyqMVMAUIt6DzaPtNJg82Zo1bGneTTj\nmsac/kTDz1lPUWGGJI152y5h/KXH7iqLpcaWzp/z2/Kge1Xt4s4UAABADtyZqlE7/tVU6a/rVYm1\n497yzv2S+7z4wG05W4RKarkzEf5lpfRiomtXr0ge65HrrwjjW9wzMbnPw5dtFca//tn0GoRTrtm4\nLLb06fivTwBoRdyZAgAAyIHOFAAAQA50pgAAAHJgzBQAAG0itfpGEeN49znu02H8T5ddFcZTS8Gk\nVtuIxu0u/fbO4bY7Xf/WML73PpPD+C3XPxDGI9ef9v4wvvPB8ZjTanBnCgAAIAc6UwAAADnwmG8Q\nqbQ4c2raBKY/6Ayp2/G1eH3xc8nXtj7gC5mPN6T7r2Wx/jWrMh8HAJqFO1MAAAA50JkCAADIwbzK\ndWeKMGTkOO/e/tCGnQ9vVukxX1Ezuvc9Plv9r79khRwsgTzqLFHlz+pHrlT/a4vrlkdmtljSM6Uv\nx0l6qV7naiGD4TrXvcat3H2zep5sEObRYLhGqYY8YswUgEFn3Q9HM5vj7tOa2Z5GGAzX2ehrHGx5\nNBiuUartOjf4mM/MhpvZ3WZ2v5k9bGbfLcW3NrO7zGyumV1mZj21NhydjzwCAHSqau5MrZJ0gLsv\nN7Ohku4ws99K+rKks9z9UjM7T9Jxks6tY1uRUy2P8kaM3SL52upXy6uw+izZPyePBrHKj5jXNrAl\nAFC8Dd6Z8gHLS18OLf3nkg6Q9J+l+EWSPlKXFqIjkEdoYTOb3YAGGQzX2cxr5P3tHJmvs6pqPjPr\nMrP7JC2SdJOkJyUtdfe+0ibPS5qY9eQYXMgjtCJ3HxS/IAbDdTbzGnl/O0ct11lVZ8rd17r7VElb\nStpd0g7VnsDMppvZHDOb430rs7YPHYQ8AgB0okzzTLn7Ukm3SvpbSZuY2RtjrraUND+xz0x3n+bu\n06x7eK7GojOQR2gFZnawmT1eKn44udntKZKZzTKzRWb20DqxTc3sJjN7ovT/Mc1sY15mNsnMbjWz\nR0pFLf+vFG/odXZqHpFD2a6zmmq+zcxsk9K/R0h6r6RHNfDL8GOlzY6VdHWtF4TORx6hlZhZl6Rz\nJB0iaUdJR5nZjs1tVaEulHTwerGTJd3s7lMk3Vz6up31SfqKu+8oaU9JJ5S+hw27zg7PowtFDlV9\nndXcmZog6VYze0DSPZJucvdrJX1d0pfNbK6ksZIuqOlSMFiQR2glu0ua6+5PuftqSZdKOqzJbSqM\nu98uaf1y28M0UOQhdUCxh7svcPc/l/79qgb+OJuoxl5nx+YROZTtOjc4NYK7PyBp1yD+lAYSCR1s\nxcsvJF/bbMd3l8UWz70u3JY8GtwqTcsxbPS4sljfkK56Nkca+MBcd8Xm5yXtUe+TNtl4d19Q+veL\nksY3szFFMrPJGvh8uUuNvc7BlkfkUAJr8wHAIOMD64g1bi2xOjKzXklXSPqiuy9b97VOus5W00nv\nbRE5RGcKwGA0X9Kkdb5OFj90kIVmNkGSSv9f1OT25FaaAPgKSZe4+5WlcCOvc7DlETmUQGcKwGB0\nj6QppeWMeiQdKWl2k9tUb7M1UOQhdUCxh5mZBsZYPuruZ67zUiOvc7DlETmUwELHAAYdd+8zsxMl\n3SCpS9Isd3+4yc0qjJn9StJ+ksaZ2fOSviPpdEmXm9lxkp6RdETzWliI90g6RtKDpcmAJekUNfA6\nOzmPyKFs10lnCsCg5O7XSYorJtqcux+VeOnAhjakjtz9DkmWeLlh19mpeUQOZbtOGxhb1RhmtlgD\nvTxJGifppYadvBznr8/5t3L3zepw3P9FHrXM+et57rrnEQAUpaGdqTed2GyOu09rysk5f9PPX5Rm\nX8dgPn+zrx0AWgUD0AEAAHKgMwUAAJBDMztTM5t4bs7f/PMXpdnXMZjP3+xrB4CW0LQxUwAAAJ2A\nx3wAAAA5NKUzZWYHm9njZjbXzE5uwvnnmdmDZnafmc1pwPlmmdkiM3tondimZnaTmT1R+v+YBp//\nVDObX3oP7jOzD9Tr/PVADpFDANAqGt6ZMrMuSedIOkTSjpKOMrMdG90OSfu7+9QGlXZfKOng9WIn\nS7rZ3adIurn0dSPPL0lnld6DqaWJ59oCOfS/yCEAaAHNuDO1u6S57v6Uu6+WdKmkw5rQjoZx99sl\n/XW98GGSLir9+yJJH2nw+dsZOTSAHAKAFtCMztRESc+t8/XzpVgjuaQbzexeM5ve4HO/Yby7Lyj9\n+0VJ45vQhhPN7IHSI5y6PSKqA3JoADkEAC1gsA5A38vdd9PAY6ITzGyfZjbGB0oqG11Wea6kbSRN\nlbRA0hkNPn+7I4fIIQCQ1JzO1HxJk9b5estSrGHcfX7p/4skXaWBx0aNttDMJkhS6f+LGnlyd1/o\n7mvdvV/S+WrOe1ArcmgAOQQALaAZnal7JE0xs63NrEfSkZJmN+rkZraRmY1649+S3ifpocp71cVs\nSceW/n2spKsbefI3fgmXHK7mvAe1IocGkEMA0AK6G31Cd+8zsxMl3SCpS9Isd3+4gU0YL+kqM5MG\nrv+X7n59PU9oZr+StJ+kcWb2vKTvSDpd0uVmdpykZyQd0eDz72dmUzXwaGiepM/U6/xFI4fIIQBo\nJcyADgAAkMNgHYAOAABQCDpTAAAAOdCZAgAAyIHOFAAAQA50pgAAAHKgMwUAAJADnSkAAIAc6EwB\nAADk8P8BDPb3D9bHb8kAAAAASUVORK5CYII=\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "r_cc_phm = phm_list[9][2]['R']['CC'][0]\n", - "r_mlo_phm = phm_list[9][2]['R']['MLO'][0]\n", - "fig,ax = subplots(1, 4)\n", - "fig.set_size_inches([10, 8])\n", - "ax[0].imshow(r_cc_phm[:,:,1], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[1].imshow(r_cc_phm[:,:,2], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[2].imshow(r_mlo_phm[:,:,1], cmap='RdBu_r', vmin=0, vmax=1)\n", - "ax[3].imshow(r_mlo_phm[:,:,2], cmap='RdBu_r', vmin=0, vmax=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Extract features for all prob heatmaps" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "from skimage.measure import label, regionprops\n", - "\n", - "def region_features(region=None):\n", - " if region is None:\n", - " return {\n", - " 'area':0, 'area_ratio':0., 'area_ratio2':0., 'eccentricity':0.,\n", - " 'equivalent_diameter':0., 'euler_number':2, 'extent':0.,\n", - " 'eig1':0., 'eig2':0., 'major_axis_length':0., 'max_intensity':0.,\n", - " 'mean_intensity':0., 'minor_axis_length':0., 'orientation':0.,\n", - " 'perimeter':0., 'solidity':0.,\n", - " }\n", - " return {\n", - " 'area':region.area,\n", - " 'area_ratio':float(region.area)/region.convex_area,\n", - " 'area_ratio2':float(region.area)/region.filled_area,\n", - " 'eccentricity':region.eccentricity,\n", - " 'equivalent_diameter':region.equivalent_diameter,\n", - " 'euler_number':region.euler_number,\n", - " 'extent':region.extent,\n", - " 'eig1':region.inertia_tensor_eigvals[0],\n", - " 'eig2':region.inertia_tensor_eigvals[1],\n", - " 'major_axis_length':region.major_axis_length,\n", - " 'max_intensity':region.max_intensity,\n", - " 'mean_intensity':region.mean_intensity,\n", - " 'minor_axis_length':region.minor_axis_length,\n", - " 'orientation':region.orientation,\n", - " 'perimeter':region.perimeter,\n", - " 'solidity':region.solidity,\n", - " }\n", - "\n", - "def total_area(regions=[]):\n", - " areas = [ reg.area for reg in regions]\n", - " return sum(areas)\n", - "\n", - "def global_max_intensity(regions=[]):\n", - " max_int = [ reg.max_intensity for reg in regions]\n", - " return max(max_int) if len(max_int) > 0 else 0.0\n", - "\n", - "def topK_region_idx(regions, k=1):\n", - " areas = [ reg.area for reg in regions]\n", - " return np.argsort(areas)[-1:-(k+1):-1]" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def prob_hm_features(phm, cutoff, k=1, nb_cls=3):\n", - " fea_list = []\n", - " if phm is None: # deal with missing view.\n", - " for _ in xrange(nb_cls - 1): # phms depending on the # of cls.\n", - " fea = {'nb_regions': np.nan, 'total_area': np.nan, 'global_max_intensity': np.nan}\n", - " for j in xrange(k):\n", - " reg_fea = {\n", - " 'area': np.nan, 'area_ratio': np.nan, 'area_ratio2': np.nan,\n", - " 'eccentricity': np.nan, 'eig1': np.nan, 'eig2': np.nan,\n", - " 'equivalent_diameter': np.nan, 'euler_number': np.nan, \n", - " 'extent': np.nan, \n", - " 'major_axis_length': np.nan, 'max_intensity': np.nan,\n", - " 'mean_intensity': np.nan, 'minor_axis_length': np.nan,\n", - " 'orientation': np.nan, 'perimeter': np.nan,\n", - " 'solidity': np.nan, \n", - " }\n", - " for key in reg_fea.keys():\n", - " new_key = 'top' + str(j+1) + '_' + key\n", - " reg_fea[new_key] = reg_fea.pop(key)\n", - " fea.update(reg_fea)\n", - " fea_list.append(fea)\n", - " return fea_list\n", - " \n", - " for i in xrange(1, nb_cls):\n", - " phm_ = phm[:,:,i]\n", - " hm_bin = np.zeros_like(phm_, dtype='uint8')\n", - " hm_bin[phm_ >= cutoff] = 255\n", - " hm_label = label(hm_bin)\n", - " props = regionprops(hm_label, phm_)\n", - " fea = {\n", - " 'nb_regions':len(props),\n", - " 'total_area':total_area(props),\n", - " 'global_max_intensity':global_max_intensity(props),\n", - " }\n", - " nb_reg = min(k, len(props))\n", - " idx = topK_region_idx(props, k)\n", - " for j,x in enumerate(idx):\n", - " reg_fea = region_features(props[x])\n", - " for key in reg_fea.keys():\n", - " new_key = 'top' + str(j+1) + '_' + key\n", - " reg_fea[new_key] = reg_fea.pop(key)\n", - " fea.update(reg_fea)\n", - " for j in xrange(nb_reg, k):\n", - " reg_fea = region_features()\n", - " for key in reg_fea.keys():\n", - " new_key = 'top' + str(j+1) + '_' + key\n", - " reg_fea[new_key] = reg_fea.pop(key)\n", - " fea.update(reg_fea)\n", - " fea_list.append(fea)\n", - " return fea_list" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def get_feature_matrix_dm(phm_list, cutoff, k=1, nb_phm=1):\n", - " '''\n", - " Args:\n", - " nb_phm ([int]): nb of phm to use for each view. Use None to use \n", - " all images. It will generate all pairwise combinations between\n", - " the CC and MLO views.\n", - " '''\n", - " import pandas as pd\n", - " label_list = []\n", - " cc_ben_list = []\n", - " cc_mal_list = []\n", - " mlo_ben_list = []\n", - " mlo_mal_list = []\n", - " for phm in phm_list:\n", - " for side in ['L', 'R']:\n", - " cancer = phm[2][side]['cancer']\n", - " cancer = 0 if np.isnan(cancer) else cancer\n", - " for cc_phm in phm[2][side]['CC'][:nb_phm]:\n", - " for mlo_phm in phm[2][side]['MLO'][:nb_phm]:\n", - " label_list.append(cancer)\n", - " cc_fea_list = prob_hm_features(cc_phm, cutoff, k)\n", - " cc_mal_list.append(cc_fea_list[0])\n", - " cc_ben_list.append(cc_fea_list[1])\n", - " mlo_fea_list = prob_hm_features(mlo_phm, cutoff, k)\n", - " mlo_mal_list.append(mlo_fea_list[0])\n", - " mlo_ben_list.append(mlo_fea_list[1])\n", - " cc_ben = pd.DataFrame.from_records(cc_ben_list)\n", - " cc_mal = pd.DataFrame.from_records(cc_mal_list)\n", - " mlo_ben = pd.DataFrame.from_records(mlo_ben_list)\n", - " mlo_mal = pd.DataFrame.from_records(mlo_mal_list)\n", - " cc_ben.columns = 'cc_ben_' + cc_ben.columns\n", - " cc_mal.columns = 'cc_mal_' + cc_mal.columns\n", - " mlo_ben.columns = 'mlo_ben_' + mlo_ben.columns\n", - " mlo_mal.columns = 'mlo_mal_' + mlo_mal.columns\n", - " fea_df = pd.concat(\n", - " [\n", - " cc_ben, \n", - " cc_mal,\n", - " mlo_ben,\n", - " mlo_mal,\n", - " ], axis=1)\n", - " return fea_df, label_list" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def get_subj_exam_dm(phm_list, nb_phm=1):\n", - " '''\n", - " Args:\n", - " nb_phm ([int]): nb of phm to use for each view. Use None to use \n", - " all images. It will generate all pairwise combinations between\n", - " the CC and MLO views.\n", - " '''\n", - " import pandas as pd\n", - " subj_exam_list = []\n", - " for phm in phm_list:\n", - " for side in ['L', 'R']:\n", - " for cc_phm in phm[2][side]['CC'][:nb_phm]:\n", - " for mlo_phm in phm[2][side]['MLO'][:nb_phm]:\n", - " subj_exam_list.append(\n", - " {'subjectId':phm[0], \n", - " 'examIndex':phm[1], \n", - " 'laterality':side}\n", - " )\n", - " subj_exam_df = pd.DataFrame.from_records(subj_exam_list)\n", - " return subj_exam_df" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(4074, 3)\n", - "(731, 3)\n" - ] - } - ], - "source": [ - "subj_exam_nAll_train = get_subj_exam_dm(phm_list_train, nb_phm=None)\n", - "subj_exam_nAll_test = get_subj_exam_dm(phm_list_test, nb_phm=None)\n", - "print subj_exam_nAll_train.shape\n", - "print subj_exam_nAll_test.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "fea_30_k2_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .3, k=2, nb_phm=None)\n", - "fea_40_k2_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .4, k=2, nb_phm=None)\n", - "fea_50_k2_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .5, k=2, nb_phm=None)\n", - "fea_60_k2_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .6, k=2, nb_phm=None)\n", - "fea_70_k2_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .7, k=2, nb_phm=None)\n", - "fea_80_k2_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .8, k=2, nb_phm=None)\n", - "fea_90_k2_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .9, k=2, nb_phm=None)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "fea_30_k3_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .3, k=3, nb_phm=None)\n", - "fea_40_k3_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .4, k=3, nb_phm=None)\n", - "fea_50_k3_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .5, k=3, nb_phm=None)\n", - "fea_60_k3_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .6, k=3, nb_phm=None)\n", - "fea_70_k3_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .7, k=3, nb_phm=None)\n", - "fea_80_k3_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .8, k=3, nb_phm=None)\n", - "fea_90_k3_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .9, k=3, nb_phm=None)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "fea_30_k4_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .3, k=4, nb_phm=None)\n", - "fea_40_k4_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .4, k=4, nb_phm=None)\n", - "fea_50_k4_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .5, k=4, nb_phm=None)\n", - "fea_60_k4_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .6, k=4, nb_phm=None)\n", - "fea_70_k4_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .7, k=4, nb_phm=None)\n", - "fea_80_k4_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .8, k=4, nb_phm=None)\n", - "fea_90_k4_nAll_train, label_train_nAll = get_feature_matrix_dm(phm_list_train, .9, k=4, nb_phm=None)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "fea_30_k2_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .3, k=2, nb_phm=None)\n", - "fea_40_k2_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .4, k=2, nb_phm=None)\n", - "fea_50_k2_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .5, k=2, nb_phm=None)\n", - "fea_60_k2_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .6, k=2, nb_phm=None)\n", - "fea_70_k2_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .7, k=2, nb_phm=None)\n", - "fea_80_k2_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .8, k=2, nb_phm=None)\n", - "fea_90_k2_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .9, k=2, nb_phm=None)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "fea_30_k3_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .3, k=3, nb_phm=None)\n", - "fea_40_k3_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .4, k=3, nb_phm=None)\n", - "fea_50_k3_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .5, k=3, nb_phm=None)\n", - "fea_60_k3_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .6, k=3, nb_phm=None)\n", - "fea_70_k3_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .7, k=3, nb_phm=None)\n", - "fea_80_k3_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .8, k=3, nb_phm=None)\n", - "fea_90_k3_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .9, k=3, nb_phm=None)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "fea_30_k4_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .3, k=4, nb_phm=None)\n", - "fea_40_k4_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .4, k=4, nb_phm=None)\n", - "fea_50_k4_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .5, k=4, nb_phm=None)\n", - "fea_60_k4_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .6, k=4, nb_phm=None)\n", - "fea_70_k4_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .7, k=4, nb_phm=None)\n", - "fea_80_k4_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .8, k=4, nb_phm=None)\n", - "fea_90_k4_nAll_test, label_test_nAll = get_feature_matrix_dm(phm_list_test, .9, k=4, nb_phm=None)" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(4074, 140)\n", - "(731, 140)\n" - ] - } - ], - "source": [ - "print fea_30_k2_nAll_train.shape\n", - "print fea_30_k2_nAll_test.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "656\n", - "3418\n", - "==========\n", - "125\n", - "606\n" - ] - } - ], - "source": [ - "label_train_nAll = np.array(label_train_nAll)\n", - "print (label_train_nAll==1).sum()\n", - "print (label_train_nAll==0).sum()\n", - "print '='*10\n", - "label_test_nAll = np.array(label_test_nAll)\n", - "print (label_test_nAll==1).sum()\n", - "print (label_test_nAll==0).sum()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Build classifiers based on extracted features" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "strategy=mean performs slightly better than median." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## K=2, n=All" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "===== cutoff: 0.3 =====\n", - "K-fold CV: 0.674200549532\n", - "Test: 0.648283828383\n", - "===== cutoff: 0.4 =====\n", - "K-fold CV: 0.666449047589\n", - "Test: 0.623339933993\n", - "===== cutoff: 0.5 =====\n", - "K-fold CV: 0.652538475198\n", - "Test: 0.648264026403\n", - "===== cutoff: 0.6 =====\n", - "K-fold CV: 0.679989538423\n", - "Test: 0.654633663366\n", - "===== cutoff: 0.7 =====\n", - "K-fold CV: 0.658347898526\n", - "Test: 0.645419141914\n", - "===== cutoff: 0.8 =====\n", - "K-fold CV: 0.650611238028\n", - "Test: 0.62502310231\n", - "===== cutoff: 0.9 =====\n", - "K-fold CV: 0.653929458999\n", - "Test: 0.604580858086\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf = RandomForestClassifier(class_weight='balanced', random_state=12345, n_jobs=-1)\n", - "imp_rf = Pipeline([('imp', imp), ('clf', rf)])\n", - "\n", - "rf_grid_param1 = {\n", - " 'clf__n_estimators':[300],\n", - "}\n", - "rf_gsearch1 = GridSearchCV(\n", - " estimator=imp_rf,\n", - " param_grid=rf_grid_param1,\n", - " scoring='roc_auc', n_jobs=1, cv=5,\n", - ")\n", - "\n", - "fea_dat_list = [\n", - " {'name': 'cutoff: 0.3', 'train': fea_30_k2_nAll_train, 'test': fea_30_k2_nAll_test},\n", - " {'name': 'cutoff: 0.4', 'train': fea_40_k2_nAll_train, 'test': fea_40_k2_nAll_test},\n", - " {'name': 'cutoff: 0.5', 'train': fea_50_k2_nAll_train, 'test': fea_50_k2_nAll_test},\n", - " {'name': 'cutoff: 0.6', 'train': fea_60_k2_nAll_train, 'test': fea_60_k2_nAll_test},\n", - " {'name': 'cutoff: 0.7', 'train': fea_70_k2_nAll_train, 'test': fea_70_k2_nAll_test},\n", - " {'name': 'cutoff: 0.8', 'train': fea_80_k2_nAll_train, 'test': fea_80_k2_nAll_test},\n", - " {'name': 'cutoff: 0.9', 'train': fea_90_k2_nAll_train, 'test': fea_90_k2_nAll_test},\n", - "]\n", - "\n", - "for d in fea_dat_list:\n", - " rf_gsearch1.fit(d['train'], label_train_nAll)\n", - " print '='*5, d['name'], '='*5\n", - " print 'K-fold CV:', rf_gsearch1.best_score_\n", - " print 'Test:', roc_auc_score(\n", - " label_test_nAll, rf_gsearch1.predict_proba(d['test'])[:,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## K=3, n=All" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "===== cutoff: 0.3 =====\n", - "K-fold CV: 0.677018188311\n", - "Test: 0.633504950495\n", - "===== cutoff: 0.4 =====\n", - "K-fold CV: 0.669807868119\n", - "Test: 0.659907590759\n", - "===== cutoff: 0.5 =====\n", - "K-fold CV: 0.656115561853\n", - "Test: 0.664528052805\n", - "===== cutoff: 0.6 =====\n", - "K-fold CV: 0.671763581168\n", - "Test: 0.645518151815\n", - "===== cutoff: 0.7 =====\n", - "K-fold CV: 0.671016931942\n", - "Test: 0.664442244224\n", - "===== cutoff: 0.8 =====\n", - "K-fold CV: 0.659887080201\n", - "Test: 0.615663366337\n", - "===== cutoff: 0.9 =====\n", - "K-fold CV: 0.657753432147\n", - "Test: 0.622594059406\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf = RandomForestClassifier(class_weight='balanced', random_state=12345, n_jobs=-1)\n", - "imp_rf = Pipeline([('imp', imp), ('clf', rf)])\n", - "\n", - "rf_grid_param1 = {\n", - " 'clf__n_estimators':[300],\n", - "}\n", - "rf_gsearch1 = GridSearchCV(\n", - " estimator=imp_rf,\n", - " param_grid=rf_grid_param1,\n", - " scoring='roc_auc', n_jobs=1, cv=5,\n", - ")\n", - "\n", - "fea_dat_list = [\n", - " {'name': 'cutoff: 0.3', 'train': fea_30_k3_nAll_train, 'test': fea_30_k3_nAll_test},\n", - " {'name': 'cutoff: 0.4', 'train': fea_40_k3_nAll_train, 'test': fea_40_k3_nAll_test},\n", - " {'name': 'cutoff: 0.5', 'train': fea_50_k3_nAll_train, 'test': fea_50_k3_nAll_test},\n", - " {'name': 'cutoff: 0.6', 'train': fea_60_k3_nAll_train, 'test': fea_60_k3_nAll_test},\n", - " {'name': 'cutoff: 0.7', 'train': fea_70_k3_nAll_train, 'test': fea_70_k3_nAll_test},\n", - " {'name': 'cutoff: 0.8', 'train': fea_80_k3_nAll_train, 'test': fea_80_k3_nAll_test},\n", - " {'name': 'cutoff: 0.9', 'train': fea_90_k3_nAll_train, 'test': fea_90_k3_nAll_test},\n", - "]\n", - "\n", - "for d in fea_dat_list:\n", - " rf_gsearch1.fit(d['train'], label_train_nAll)\n", - " print '='*5, d['name'], '='*5\n", - " print 'K-fold CV:', rf_gsearch1.best_score_\n", - " print 'Test:', roc_auc_score(\n", - " label_test_nAll, rf_gsearch1.predict_proba(d['test'])[:,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## K=4, n=All" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "===== cutoff: 0.3 =====\n", - "K-fold CV: 0.669798404757\n", - "Test: 0.640396039604\n", - "===== cutoff: 0.4 =====\n", - "K-fold CV: 0.6786617849\n", - "Test: 0.640396039604\n", - "===== cutoff: 0.5 =====\n", - "K-fold CV: 0.655805739571\n", - "Test: 0.647214521452\n", - "===== cutoff: 0.6 =====\n", - "K-fold CV: 0.665231372819\n", - "Test: 0.648627062706\n", - "===== cutoff: 0.7 =====\n", - "K-fold CV: 0.662909022404\n", - "Test: 0.655940594059\n", - "===== cutoff: 0.8 =====\n", - "K-fold CV: 0.660822402026\n", - "Test: 0.631207920792\n", - "===== cutoff: 0.9 =====\n", - "K-fold CV: 0.660981059388\n", - "Test: 0.629940594059\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf = RandomForestClassifier(class_weight='balanced', random_state=12345, n_jobs=-1)\n", - "imp_rf = Pipeline([('imp', imp), ('clf', rf)])\n", - "\n", - "rf_grid_param1 = {\n", - " 'clf__n_estimators':[300],\n", - "}\n", - "rf_gsearch1 = GridSearchCV(\n", - " estimator=imp_rf,\n", - " param_grid=rf_grid_param1,\n", - " scoring='roc_auc', n_jobs=1, cv=5,\n", - ")\n", - "\n", - "fea_dat_list = [\n", - " {'name': 'cutoff: 0.3', 'train': fea_30_k4_nAll_train, 'test': fea_30_k4_nAll_test},\n", - " {'name': 'cutoff: 0.4', 'train': fea_40_k4_nAll_train, 'test': fea_40_k4_nAll_test},\n", - " {'name': 'cutoff: 0.5', 'train': fea_50_k4_nAll_train, 'test': fea_50_k4_nAll_test},\n", - " {'name': 'cutoff: 0.6', 'train': fea_60_k4_nAll_train, 'test': fea_60_k4_nAll_test},\n", - " {'name': 'cutoff: 0.7', 'train': fea_70_k4_nAll_train, 'test': fea_70_k4_nAll_test},\n", - " {'name': 'cutoff: 0.8', 'train': fea_80_k4_nAll_train, 'test': fea_80_k4_nAll_test},\n", - " {'name': 'cutoff: 0.9', 'train': fea_90_k4_nAll_train, 'test': fea_90_k4_nAll_test},\n", - "]\n", - "\n", - "for d in fea_dat_list:\n", - " rf_gsearch1.fit(d['train'], label_train_nAll)\n", - " print '='*5, d['name'], '='*5\n", - " print 'K-fold CV:', rf_gsearch1.best_score_\n", - " print 'Test:', roc_auc_score(\n", - " label_test_nAll, rf_gsearch1.predict_proba(d['test'])[:,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Feature selection with Boruta" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Nb of top regions=2, Nb of imgs=All" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "fea_30_k2_nAll_train.columns = '30_' + fea_50_k2_nAll_train.columns\n", - "fea_40_k2_nAll_train.columns = '40_' + fea_50_k2_nAll_train.columns\n", - "fea_50_k2_nAll_train.columns = '50_' + fea_50_k2_nAll_train.columns\n", - "fea_60_k2_nAll_train.columns = '60_' + fea_50_k2_nAll_train.columns\n", - "fea_70_k2_nAll_train.columns = '70_' + fea_70_k2_nAll_train.columns\n", - "fea_80_k2_nAll_train.columns = '80_' + fea_50_k2_nAll_train.columns\n", - "fea_90_k2_nAll_train.columns = '90_' + fea_90_k2_nAll_train.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(4074, 980)\n", - "(731, 980)\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "fea_all_k2_nAll_train = pd.concat(\n", - " [fea_30_k2_nAll_train, fea_40_k2_nAll_train, fea_50_k2_nAll_train,\n", - " fea_60_k2_nAll_train, fea_70_k2_nAll_train, fea_80_k2_nAll_train,\n", - " fea_90_k2_nAll_train], axis=1)\n", - "fea_all_k2_nAll_test = pd.concat(\n", - " [fea_30_k2_nAll_test, fea_40_k2_nAll_test, fea_50_k2_nAll_test,\n", - " fea_60_k2_nAll_test, fea_70_k2_nAll_test, fea_80_k2_nAll_test,\n", - " fea_90_k2_nAll_test], axis=1)\n", - "print fea_all_k2_nAll_train.shape\n", - "print fea_all_k2_nAll_test.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "31.304951685\n", - "0.03194382825\n" - ] - } - ], - "source": [ - "print 980**.5\n", - "print 980**.5 / 980" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Nb of selected features: 159\n", - "Nb of weak features: 3\n", - "==========\n", - "Test AUC: 0.670323432343\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "from boruta import BorutaPy\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf1 = RandomForestClassifier(class_weight='balanced', max_depth=5, random_state=12345, n_jobs=-1)\n", - "feat_selector = BorutaPy(rf1, n_estimators='auto', perc=95, max_iter=200, verbose=0, random_state=12345)\n", - "rf2 = RandomForestClassifier(n_estimators=300, class_weight='balanced', random_state=12345, n_jobs=-1)\n", - "impfeat_clf = Pipeline([('imp', imp), ('feat', feat_selector), ('clf', rf2)])\n", - "\n", - "impfeat_clf.fit(fea_all_k2_nAll_train, label_train_nAll)\n", - "\n", - "learned_feat = impfeat_clf.named_steps['feat']\n", - "print 'Nb of selected features:', learned_feat.support_.sum()\n", - "print 'Nb of weak features:', learned_feat.support_weak_.sum()\n", - "print '='*10\n", - "print 'Test AUC:', roc_auc_score(\n", - " label_test_nAll, impfeat_clf.predict_proba(fea_all_k2_nAll_test)[:,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Change the max_depth value for Boruta to 3" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Nb of selected features: 294\n", - "Nb of weak features: 16\n", - "==========\n", - "Test AUC: 0.64996039604\n", - "==========\n", - "Time spent: 1378.59113908\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "from boruta import BorutaPy\n", - "import time\n", - "\n", - "time_begin = time.time()\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf1 = RandomForestClassifier(class_weight='balanced', max_depth=3, random_state=12345, n_jobs=-1)\n", - "feat_selector = BorutaPy(rf1, n_estimators='auto', perc=95, max_iter=200, verbose=0, random_state=12345)\n", - "rf2 = RandomForestClassifier(n_estimators=300, class_weight='balanced', random_state=12345, n_jobs=-1)\n", - "impfeat_clf = Pipeline([('imp', imp), ('feat', feat_selector), ('clf', rf2)])\n", - "\n", - "impfeat_clf.fit(fea_all_k2_nAll_train, label_train_nAll)\n", - "\n", - "learned_feat = impfeat_clf.named_steps['feat']\n", - "print 'Nb of selected features:', learned_feat.support_.sum()\n", - "print 'Nb of weak features:', learned_feat.support_weak_.sum()\n", - "print '='*10\n", - "print 'Test AUC:', roc_auc_score(\n", - " label_test_nAll, impfeat_clf.predict_proba(fea_all_k2_nAll_test)[:,1])\n", - "print '='*10\n", - "print 'Time spent:', time.time() - time_begin" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Setting max_depth to 3 makes the performance worse" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Without feature selection" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Without feat sel, test AUC: 0.656607260726\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "from boruta import BorutaPy\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf2 = RandomForestClassifier(n_estimators=300, class_weight='balanced', random_state=12345, n_jobs=-1)\n", - "imp_clf = Pipeline([('imp', imp), ('clf', rf2)])\n", - "\n", - "imp_clf.fit(fea_all_k2_nAll_train, label_train_nAll)\n", - "\n", - "print 'Without feat sel, test AUC:', roc_auc_score(\n", - " label_test_nAll, imp_clf.predict_proba(fea_all_k2_nAll_test)[:,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Feature selection does not makes the performance worse" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Nb of top regions=3, Nb of imgs=All" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "fea_30_k3_nAll_train.columns = '30_' + fea_50_k3_nAll_train.columns\n", - "fea_40_k3_nAll_train.columns = '40_' + fea_50_k3_nAll_train.columns\n", - "fea_50_k3_nAll_train.columns = '50_' + fea_50_k3_nAll_train.columns\n", - "fea_60_k3_nAll_train.columns = '60_' + fea_50_k3_nAll_train.columns\n", - "fea_70_k3_nAll_train.columns = '70_' + fea_70_k3_nAll_train.columns\n", - "fea_80_k3_nAll_train.columns = '80_' + fea_50_k3_nAll_train.columns\n", - "fea_90_k3_nAll_train.columns = '90_' + fea_90_k3_nAll_train.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(4074, 1428)\n", - "(731, 1428)\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "fea_all_k3_nAll_train = pd.concat(\n", - " [fea_30_k3_nAll_train, fea_40_k3_nAll_train, fea_50_k3_nAll_train,\n", - " fea_60_k3_nAll_train, fea_70_k3_nAll_train, fea_80_k3_nAll_train,\n", - " fea_90_k3_nAll_train], axis=1)\n", - "fea_all_k3_nAll_test = pd.concat(\n", - " [fea_30_k3_nAll_test, fea_40_k3_nAll_test, fea_50_k3_nAll_test,\n", - " fea_60_k3_nAll_test, fea_70_k3_nAll_test, fea_80_k3_nAll_test,\n", - " fea_90_k3_nAll_test], axis=1)\n", - "print fea_all_k3_nAll_train.shape\n", - "print fea_all_k3_nAll_test.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Nb of selected features: 188\n", - "Nb of weak features: 2\n", - "==========\n", - "Test AUC: 0.675075907591\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "from boruta import BorutaPy\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf1 = RandomForestClassifier(class_weight='balanced', max_depth=5, random_state=12345, n_jobs=-1)\n", - "feat_selector = BorutaPy(rf1, n_estimators='auto', perc=95, max_iter=100, verbose=0, random_state=12345)\n", - "rf2 = RandomForestClassifier(n_estimators=300, class_weight='balanced', random_state=12345, n_jobs=-1)\n", - "impfeat_clf = Pipeline([('imp', imp), ('feat', feat_selector), ('clf', rf2)])\n", - "\n", - "impfeat_clf.fit(fea_all_k3_nAll_train, label_train_nAll)\n", - "\n", - "learned_feat = impfeat_clf.named_steps['feat']\n", - "print 'Nb of selected features:', learned_feat.support_.sum()\n", - "print 'Nb of weak features:', learned_feat.support_weak_.sum()\n", - "print '='*10\n", - "print 'Test AUC:', roc_auc_score(\n", - " label_test_nAll, impfeat_clf.predict_proba(fea_all_k3_nAll_test)[:,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Try again with more iterations" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Nb of selected features: 190\n", - "Nb of weak features: 7\n", - "==========\n", - "Test AUC: 0.652805280528\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "from boruta import BorutaPy\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf1 = RandomForestClassifier(class_weight='balanced', max_depth=5, random_state=12345, n_jobs=-1)\n", - "feat_selector = BorutaPy(rf1, n_estimators='auto', perc=95, max_iter=200, verbose=0, random_state=12345)\n", - "rf2 = RandomForestClassifier(n_estimators=300, class_weight='balanced', random_state=12345, n_jobs=-1)\n", - "impfeat_clf = Pipeline([('imp', imp), ('feat', feat_selector), ('clf', rf2)])\n", - "\n", - "impfeat_clf.fit(fea_all_k3_nAll_train, label_train_nAll)\n", - "\n", - "learned_feat = impfeat_clf.named_steps['feat']\n", - "print 'Nb of selected features:', learned_feat.support_.sum()\n", - "print 'Nb of weak features:', learned_feat.support_weak_.sum()\n", - "print '='*10\n", - "print 'Test AUC:', roc_auc_score(\n", - " label_test_nAll, impfeat_clf.predict_proba(fea_all_k3_nAll_test)[:,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Without feature selection" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Without feat sel, test AUC: 0.658396039604\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "from boruta import BorutaPy\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf2 = RandomForestClassifier(n_estimators=300, class_weight='balanced', random_state=12345, n_jobs=-1)\n", - "imp_clf = Pipeline([('imp', imp), ('clf', rf2)])\n", - "\n", - "imp_clf.fit(fea_all_k3_nAll_train, label_train_nAll)\n", - "\n", - "print 'Without feat sel, test AUC:', roc_auc_score(\n", - " label_test_nAll, imp_clf.predict_proba(fea_all_k3_nAll_test)[:,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Feature selection improves the performance" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Nb of top regions=4, Nb of imgs=All" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "fea_30_k4_nAll_train.columns = '30_' + fea_50_k4_nAll_train.columns\n", - "fea_40_k4_nAll_train.columns = '40_' + fea_50_k4_nAll_train.columns\n", - "fea_50_k4_nAll_train.columns = '50_' + fea_50_k4_nAll_train.columns\n", - "fea_60_k4_nAll_train.columns = '60_' + fea_50_k4_nAll_train.columns\n", - "fea_70_k4_nAll_train.columns = '70_' + fea_70_k4_nAll_train.columns\n", - "fea_80_k4_nAll_train.columns = '80_' + fea_50_k4_nAll_train.columns\n", - "fea_90_k4_nAll_train.columns = '90_' + fea_90_k4_nAll_train.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(4074, 1876)\n", - "(731, 1876)\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "fea_all_k4_nAll_train = pd.concat(\n", - " [fea_30_k4_nAll_train, fea_40_k4_nAll_train, fea_50_k4_nAll_train,\n", - " fea_60_k4_nAll_train, fea_70_k4_nAll_train, fea_80_k4_nAll_train,\n", - " fea_90_k4_nAll_train], axis=1)\n", - "fea_all_k4_nAll_test = pd.concat(\n", - " [fea_30_k4_nAll_test, fea_40_k4_nAll_test, fea_50_k4_nAll_test,\n", - " fea_60_k4_nAll_test, fea_70_k4_nAll_test, fea_80_k4_nAll_test,\n", - " fea_90_k4_nAll_test], axis=1)\n", - "print fea_all_k4_nAll_train.shape\n", - "print fea_all_k4_nAll_test.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Nb of selected features: 212\n", - "Nb of weak features: 0\n", - "==========\n", - "Test AUC: 0.649082508251\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "from boruta import BorutaPy\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf1 = RandomForestClassifier(class_weight='balanced', max_depth=5, random_state=12345, n_jobs=-1)\n", - "feat_selector = BorutaPy(rf1, n_estimators='auto', perc=95, max_iter=100, verbose=0, random_state=12345)\n", - "rf2 = RandomForestClassifier(n_estimators=300, class_weight='balanced', random_state=12345, n_jobs=-1)\n", - "impfeat_clf = Pipeline([('imp', imp), ('feat', feat_selector), ('clf', rf2)])\n", - "\n", - "impfeat_clf.fit(fea_all_k4_nAll_train, label_train_nAll)\n", - "\n", - "learned_feat = impfeat_clf.named_steps['feat']\n", - "print 'Nb of selected features:', learned_feat.support_.sum()\n", - "print 'Nb of weak features:', learned_feat.support_weak_.sum()\n", - "print '='*10\n", - "print 'Test AUC:', roc_auc_score(\n", - " label_test_nAll, impfeat_clf.predict_proba(fea_all_k4_nAll_test)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Without feat sel, test AUC: 0.648402640264\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "from boruta import BorutaPy\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf2 = RandomForestClassifier(n_estimators=300, class_weight='balanced', random_state=12345, n_jobs=-1)\n", - "imp_clf = Pipeline([('imp', imp), ('clf', rf2)])\n", - "\n", - "imp_clf.fit(fea_all_k4_nAll_train, label_train_nAll)\n", - "\n", - "print 'Without feat sel, test AUC:', roc_auc_score(\n", - " label_test_nAll, imp_clf.predict_proba(fea_all_k4_nAll_test)[:,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "# Finetune classifiers for k=3, n=None" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Random forest" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'clf__max_depth': 9, 'clf__min_samples_split': 400}\n", - "==========\n", - "0.679600924503\n", - "==========\n", - "0.682693069307\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf = RandomForestClassifier(n_estimators=300, class_weight='balanced', \n", - " random_state=12345, n_jobs=-1)\n", - "imp_clf = Pipeline([('imp', imp), ('clf', rf)])\n", - "\n", - "rf_grid_param1 = {\n", - " 'clf__min_samples_split': [2, 100, 200, 300, 400],\n", - " 'clf__max_depth': range(3, 10, 2),\n", - "}\n", - "\n", - "rf_gsearch1 = GridSearchCV(\n", - " estimator=imp_clf,\n", - " param_grid=rf_grid_param1,\n", - " scoring='roc_auc', n_jobs=1, cv=5,\n", - ")\n", - "rf_gsearch1.fit(fea_all_k3_nAll_train, label_train_nAll)\n", - "\n", - "print rf_gsearch1.best_params_\n", - "print '='*10\n", - "print rf_gsearch1.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, rf_gsearch1.predict_proba(fea_all_k3_nAll_test)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'clf__max_depth': 8, 'clf__min_samples_split': 400}\n", - "==========\n", - "0.679817151482\n", - "==========\n", - "0.683102310231\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf = RandomForestClassifier(n_estimators=300, class_weight='balanced', \n", - " random_state=12345, n_jobs=-1)\n", - "imp_clf = Pipeline([('imp', imp), ('clf', rf)])\n", - "\n", - "rf_grid_param2 = {\n", - " 'clf__min_samples_split': range(400, 600, 50),\n", - " 'clf__max_depth': range(8, 13),\n", - "}\n", - "\n", - "rf_gsearch2 = GridSearchCV(\n", - " estimator=imp_clf,\n", - " param_grid=rf_grid_param2,\n", - " scoring='roc_auc', n_jobs=1, cv=5,\n", - ")\n", - "rf_gsearch2.fit(fea_all_k3_nAll_train, label_train_nAll)\n", - "\n", - "print rf_gsearch2.best_params_\n", - "print '='*10\n", - "print rf_gsearch2.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, rf_gsearch2.predict_proba(fea_all_k3_nAll_test)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'clf__max_depth': 8, 'clf__min_samples_split': 400}\n", - "==========\n", - "0.679817151482\n", - "==========\n", - "0.683102310231\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "rf = RandomForestClassifier(n_estimators=300, class_weight='balanced', \n", - " random_state=12345, n_jobs=-1)\n", - "imp_clf = Pipeline([('imp', imp), ('clf', rf)])\n", - "\n", - "rf_grid_param3 = {\n", - " 'clf__min_samples_split': range(325, 450, 25),\n", - " 'clf__max_depth': [8],\n", - "}\n", - "\n", - "rf_gsearch3 = GridSearchCV(\n", - " estimator=imp_clf,\n", - " param_grid=rf_grid_param3,\n", - " scoring='roc_auc', n_jobs=1, cv=5,\n", - ")\n", - "rf_gsearch3.fit(fea_all_k3_nAll_train, label_train_nAll)\n", - "\n", - "print rf_gsearch3.best_params_\n", - "print '='*10\n", - "print rf_gsearch3.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, rf_gsearch3.predict_proba(fea_all_k3_nAll_test)[:,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extra trees" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'clf__max_depth': 5, 'clf__min_samples_split': 100}\n", - "==========\n", - "0.679187519086\n", - "==========\n", - "0.673795379538\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import ExtraTreesClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "et = ExtraTreesClassifier(n_estimators=300, class_weight='balanced', \n", - " random_state=12345, n_jobs=-1)\n", - "imp_clf = Pipeline([('imp', imp), ('clf', et)])\n", - "\n", - "et_grid_param1 = {\n", - " 'clf__min_samples_split': [2, 100, 200, 300, 400],\n", - " 'clf__max_depth': range(3, 10, 2),\n", - "}\n", - "\n", - "et_gsearch1 = GridSearchCV(\n", - " estimator=imp_clf,\n", - " param_grid=et_grid_param1,\n", - " scoring='roc_auc', n_jobs=1, cv=5,\n", - ")\n", - "et_gsearch1.fit(fea_all_k3_nAll_train, label_train_nAll)\n", - "\n", - "print et_gsearch1.best_params_\n", - "print '='*10\n", - "print et_gsearch1.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, et_gsearch1.predict_proba(fea_all_k3_nAll_test)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'clf__max_depth': 5, 'clf__min_samples_split': 100}\n", - "==========\n", - "0.679187519086\n", - "==========\n", - "0.673795379538\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.ensemble import ExtraTreesClassifier\n", - "from sklearn.preprocessing import Imputer\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.pipeline import Pipeline\n", - "\n", - "imp = Imputer(missing_values='NaN', strategy='mean')\n", - "et = ExtraTreesClassifier(n_estimators=300, class_weight='balanced', \n", - " random_state=12345, n_jobs=-1)\n", - "imp_clf = Pipeline([('imp', imp), ('clf', et)])\n", - "\n", - "et_grid_param2 = {\n", - " 'clf__min_samples_split': range(25, 200, 25),\n", - " 'clf__max_depth': range(4, 7),\n", - "}\n", - "\n", - "et_gsearch2 = GridSearchCV(\n", - " estimator=imp_clf,\n", - " param_grid=et_grid_param2,\n", - " scoring='roc_auc', n_jobs=1, cv=5,\n", - ")\n", - "et_gsearch2.fit(fea_all_k3_nAll_train, label_train_nAll)\n", - "\n", - "print et_gsearch2.best_params_\n", - "print '='*10\n", - "print et_gsearch2.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, et_gsearch2.predict_proba(fea_all_k3_nAll_test)[:,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Gradient boosting trees" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "656\n", - "3418\n", - "5\n" - ] - } - ], - "source": [ - "print (label_train_nAll==1).sum()\n", - "print (label_train_nAll==0).sum()\n", - "print (label_train_nAll==0).sum() / (label_train_nAll==1).sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'n_estimators': 40}\n", - "==========\n", - "0.677631125182\n", - "==========\n", - "0.686165016502\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from xgboost import XGBClassifier\n", - "\n", - "bst_grid_param1 = {\n", - " 'n_estimators': range(20, 200, 20)\n", - "}\n", - "bst_gsearch1 = GridSearchCV(\n", - " estimator=XGBClassifier(\n", - " max_depth=5,\n", - " min_child_weight=100,\n", - " learning_rate=.1,\n", - " subsample=.8,\n", - " colsample_bytree=.05,\n", - " scale_pos_weight=5.,\n", - " seed=12345,\n", - " ),\n", - " param_grid=bst_grid_param1,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "bst_gsearch1.fit(fea_all_k3_nAll_train.values, label_train_nAll)\n", - "\n", - "print bst_gsearch1.best_params_\n", - "print '='*10\n", - "print bst_gsearch1.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, bst_gsearch1.predict_proba(fea_all_k3_nAll_test.values)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'n_estimators': 50}\n", - "==========\n", - "0.680051339099\n", - "==========\n", - "0.68102970297\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from xgboost import XGBClassifier\n", - "\n", - "bst_grid_param2 = {\n", - " 'n_estimators': range(25, 60, 5)\n", - "}\n", - "bst_gsearch2 = GridSearchCV(\n", - " estimator=XGBClassifier(\n", - " max_depth=5,\n", - " min_child_weight=100,\n", - " learning_rate=.1,\n", - " subsample=.8,\n", - " colsample_bytree=.05,\n", - " scale_pos_weight=5.,\n", - " seed=12345,\n", - " ),\n", - " param_grid=bst_grid_param2,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "bst_gsearch2.fit(fea_all_k3_nAll_train.values, label_train_nAll)\n", - "\n", - "print bst_gsearch2.best_params_\n", - "print '='*10\n", - "print bst_gsearch2.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, bst_gsearch2.predict_proba(fea_all_k3_nAll_test.values)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'n_estimators': 52}\n", - "==========\n", - "0.680638160971\n", - "==========\n", - "0.680224422442\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from xgboost import XGBClassifier\n", - "\n", - "bst_grid_param3 = {\n", - " 'n_estimators': range(46, 55)\n", - "}\n", - "bst_gsearch3 = GridSearchCV(\n", - " estimator=XGBClassifier(\n", - " max_depth=5,\n", - " min_child_weight=100,\n", - " learning_rate=.1,\n", - " subsample=.8,\n", - " colsample_bytree=.05,\n", - " scale_pos_weight=5.,\n", - " seed=12345,\n", - " ),\n", - " param_grid=bst_grid_param3,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "bst_gsearch3.fit(fea_all_k3_nAll_train.values, label_train_nAll)\n", - "\n", - "print bst_gsearch3.best_params_\n", - "print '='*10\n", - "print bst_gsearch3.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, bst_gsearch3.predict_proba(fea_all_k3_nAll_test.values)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'n_estimators': 40, 'max_depth': 1, 'min_child_weight': 50}\n", - "==========\n", - "0.68502671217\n", - "==========\n", - "0.683386138614\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from xgboost import XGBClassifier\n", - "\n", - "bst_grid_param5 = {\n", - " 'n_estimators': [40],\n", - " 'max_depth': range(1, 10, 2),\n", - " 'min_child_weight': range(50, 200, 50), \n", - "}\n", - "bst_gsearch5 = GridSearchCV(\n", - " estimator=XGBClassifier(\n", - " learning_rate=.1,\n", - " subsample=.8,\n", - " colsample_bytree=.05,\n", - " scale_pos_weight=5.,\n", - " seed=12345,\n", - " ),\n", - " param_grid=bst_grid_param5,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "bst_gsearch5.fit(fea_all_k3_nAll_train.values, label_train_nAll)\n", - "\n", - "print bst_gsearch5.best_params_\n", - "print '='*10\n", - "print bst_gsearch5.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, bst_gsearch5.predict_proba(fea_all_k3_nAll_test.values)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'n_estimators': 40, 'max_depth': 1, 'min_child_weight': 40}\n", - "==========\n", - "0.685216507107\n", - "==========\n", - "0.683386138614\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from xgboost import XGBClassifier\n", - "\n", - "bst_grid_param6 = {\n", - " 'n_estimators': [40],\n", - " 'max_depth': [1],\n", - " 'min_child_weight': range(10, 100, 10),\n", - "}\n", - "bst_gsearch6 = GridSearchCV(\n", - " estimator=XGBClassifier(\n", - " learning_rate=.1,\n", - " subsample=.8,\n", - " colsample_bytree=.05,\n", - " scale_pos_weight=5.,\n", - " seed=12345,\n", - " ),\n", - " param_grid=bst_grid_param6,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "bst_gsearch6.fit(fea_all_k3_nAll_train.values, label_train_nAll)\n", - "\n", - "print bst_gsearch6.best_params_\n", - "print '='*10\n", - "print bst_gsearch6.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, bst_gsearch6.predict_proba(fea_all_k3_nAll_test.values)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'n_estimators': 40, 'colsample_bytree': 0.080000000000000002, 'max_depth': 1, 'min_child_weight': 40}\n", - "==========\n", - "0.687216736009\n", - "==========\n", - "0.690158415842\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from xgboost import XGBClassifier\n", - "\n", - "bst_grid_param7 = {\n", - " 'n_estimators': [40],\n", - " 'max_depth': [1],\n", - " 'min_child_weight': [40],\n", - " 'colsample_bytree': np.arange(.01, .10, .01),\n", - "}\n", - "bst_gsearch7 = GridSearchCV(\n", - " estimator=XGBClassifier(\n", - " learning_rate=.1,\n", - " subsample=.8,\n", - " scale_pos_weight=5.,\n", - " seed=12345,\n", - " ),\n", - " param_grid=bst_grid_param7,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "bst_gsearch7.fit(fea_all_k3_nAll_train.values, label_train_nAll)\n", - "\n", - "print bst_gsearch7.best_params_\n", - "print '='*10\n", - "print bst_gsearch7.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, bst_gsearch7.predict_proba(fea_all_k3_nAll_test.values)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'n_estimators': 40, 'subsample': 0.80000000000000027, 'colsample_bytree': 0.08, 'max_depth': 1, 'min_child_weight': 40}\n", - "==========\n", - "0.687216736009\n", - "==========\n", - "0.690158415842\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from xgboost import XGBClassifier\n", - "\n", - "bst_grid_param8 = {\n", - " 'n_estimators': [40],\n", - " 'max_depth': [1],\n", - " 'min_child_weight': [40],\n", - " 'colsample_bytree': [.08],\n", - " 'subsample': np.arange(.5, .95, .05),\n", - "}\n", - "bst_gsearch8 = GridSearchCV(\n", - " estimator=XGBClassifier(\n", - " learning_rate=.1,\n", - " scale_pos_weight=5.,\n", - " seed=12345,\n", - " ),\n", - " param_grid=bst_grid_param8,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "bst_gsearch8.fit(fea_all_k3_nAll_train.values, label_train_nAll)\n", - "\n", - "print bst_gsearch8.best_params_\n", - "print '='*10\n", - "print bst_gsearch8.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, bst_gsearch8.predict_proba(fea_all_k3_nAll_test.values)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'n_estimators': 400, 'subsample': 0.8, 'colsample_bytree': 0.08, 'max_depth': 1, 'min_child_weight': 40}\n", - "==========\n", - "0.685574672974\n", - "==========\n", - "0.685900990099\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from xgboost import XGBClassifier\n", - "\n", - "bst_grid_param9 = {\n", - " 'n_estimators': [400],\n", - " 'max_depth': [1],\n", - " 'min_child_weight': [40],\n", - " 'colsample_bytree': [.08],\n", - " 'subsample': [.8],\n", - "}\n", - "bst_gsearch9 = GridSearchCV(\n", - " estimator=XGBClassifier(\n", - " learning_rate=.01,\n", - " scale_pos_weight=5.,\n", - " seed=12345,\n", - " ),\n", - " param_grid=bst_grid_param9,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "bst_gsearch9.fit(fea_all_k3_nAll_train.values, label_train_nAll)\n", - "\n", - "print bst_gsearch9.best_params_\n", - "print '='*10\n", - "print bst_gsearch9.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, bst_gsearch9.predict_proba(fea_all_k3_nAll_test.values)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'n_estimators': 4000, 'subsample': 0.8, 'colsample_bytree': 0.08, 'max_depth': 1, 'min_child_weight': 40}\n", - "==========\n", - "0.685614727629\n", - "==========\n", - "0.685504950495\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from xgboost import XGBClassifier\n", - "\n", - "bst_grid_param10 = {\n", - " 'n_estimators': [4000],\n", - " 'max_depth': [1],\n", - " 'min_child_weight': [40],\n", - " 'colsample_bytree': [.08],\n", - " 'subsample': [.8],\n", - "}\n", - "bst_gsearch10 = GridSearchCV(\n", - " estimator=XGBClassifier(\n", - " learning_rate=.001,\n", - " scale_pos_weight=5.,\n", - " seed=12345,\n", - " ),\n", - " param_grid=bst_grid_param10,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "bst_gsearch10.fit(fea_all_k3_nAll_train.values, label_train_nAll)\n", - "\n", - "print bst_gsearch10.best_params_\n", - "print '='*10\n", - "print bst_gsearch10.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, bst_gsearch10.predict_proba(fea_all_k3_nAll_test.values)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 150, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'colsample_bytree': 0.08, 'scale_pos_weight': 5, 'min_child_weight': 40, 'n_estimators': 40, 'subsample': 0.8, 'max_depth': 1}\n", - "==========\n", - "0.687216736009\n", - "==========\n", - "0.690158415842\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from xgboost import XGBClassifier\n", - "\n", - "bst_grid_param11 = {\n", - " 'n_estimators': [40],\n", - " 'max_depth': [1],\n", - " 'min_child_weight': [40],\n", - " 'colsample_bytree': [.08],\n", - " 'subsample': [.8],\n", - " 'scale_pos_weight': range(1, 20, 4)\n", - "}\n", - "bst_gsearch11 = GridSearchCV(\n", - " estimator=XGBClassifier(\n", - " learning_rate=.1,\n", - " seed=12345,\n", - " ),\n", - " param_grid=bst_grid_param11,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "bst_gsearch11.fit(fea_all_k3_nAll_train.values, label_train_nAll)\n", - "\n", - "print bst_gsearch11.best_params_\n", - "print '='*10\n", - "print bst_gsearch11.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, bst_gsearch11.predict_proba(fea_all_k3_nAll_test.values)[:,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Blending different classifiers on the test set" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(731, 3)\n" - ] - } - ], - "source": [ - "rf_test_pred = rf_gsearch3.predict_proba(fea_all_k3_nAll_test)[:,1]\n", - "et_test_pred = et_gsearch2.predict_proba(fea_all_k3_nAll_test)[:,1]\n", - "bst_test_pred = bst_gsearch8.predict_proba(fea_all_k3_nAll_test.values)[:,1]\n", - "all_clf_test_pred = np.stack([rf_test_pred, et_test_pred, bst_test_pred], axis=1)\n", - "print all_clf_test_pred.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'C': 1.0000000000000001e-05}\n", - "==========\n", - "0.689927882642\n", - "==========\n", - "0.313636963696\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.svm import SVC\n", - "\n", - "svm_blend_grid_param1 = {\n", - " 'C': 10.**np.arange(-5, 6, 1),\n", - "}\n", - "svm_blend_gsearch1 = GridSearchCV(\n", - " estimator=SVC(\n", - " kernel='linear',\n", - " probability=True,\n", - " class_weight='balanced',\n", - " random_state=12345,\n", - " ),\n", - " param_grid=svm_blend_grid_param1,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "svm_blend_gsearch1.fit(all_clf_test_pred, label_test_nAll)\n", - "\n", - "print svm_blend_gsearch1.best_params_\n", - "print '='*10\n", - "print svm_blend_gsearch1.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, svm_blend_gsearch1.predict_proba(all_clf_test_pred)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'C': 1.0000000000000001e-05}\n", - "==========\n", - "0.689927882642\n", - "==========\n", - "0.686363036304\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.linear_model import LogisticRegression\n", - "\n", - "log_blend_grid_param1 = {\n", - " 'C': 10.**np.arange(-5, 6, 1),\n", - "}\n", - "log_blend_gsearch1 = GridSearchCV(\n", - " estimator=LogisticRegression(\n", - " class_weight='balanced',\n", - " random_state=12345,\n", - " max_iter=1000,\n", - " ),\n", - " param_grid=log_blend_grid_param1,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "log_blend_gsearch1.fit(all_clf_test_pred, label_test_nAll)\n", - "\n", - "print log_blend_gsearch1.best_params_\n", - "print '='*10\n", - "print log_blend_gsearch1.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, log_blend_gsearch1.predict_proba(all_clf_test_pred)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'C': 1e-10}\n", - "==========\n", - "0.689927882642\n", - "==========\n", - "0.686363036304\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.linear_model import LogisticRegression\n", - "\n", - "log_blend_grid_param2 = {\n", - " 'C': 10.**np.arange(-10, -4, 1),\n", - "}\n", - "log_blend_gsearch2 = GridSearchCV(\n", - " estimator=LogisticRegression(\n", - " class_weight='balanced',\n", - " random_state=12345,\n", - " max_iter=1000,\n", - " ),\n", - " param_grid=log_blend_grid_param2,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "log_blend_gsearch2.fit(all_clf_test_pred, label_test_nAll)\n", - "\n", - "print log_blend_gsearch2.best_params_\n", - "print '='*10\n", - "print log_blend_gsearch2.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, log_blend_gsearch2.predict_proba(all_clf_test_pred)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{}\n", - "==========\n", - "0.621087563347\n", - "==========\n", - "1.0\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "\n", - "rf_blend_grid_param1 = {}\n", - "rf_blend_gsearch1 = GridSearchCV(\n", - " estimator=RandomForestClassifier(\n", - " n_estimators=100, \n", - " class_weight='balanced', \n", - " random_state=12345, n_jobs=-1\n", - " ),\n", - " param_grid=rf_blend_grid_param1,\n", - " scoring='roc_auc', n_jobs=-1, cv=5, \n", - ")\n", - "rf_blend_gsearch1.fit(all_clf_test_pred, label_test_nAll)\n", - "\n", - "print rf_blend_gsearch1.best_params_\n", - "print '='*10\n", - "print rf_blend_gsearch1.best_score_\n", - "print '='*10\n", - "print roc_auc_score(\n", - " label_test_nAll, rf_blend_gsearch1.predict_proba(all_clf_test_pred)[:,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Simply taking an average." - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.6859669967\n" - ] - } - ], - "source": [ - "print roc_auc_score(label_test_nAll, all_clf_test_pred.mean(axis=1))" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 1. , 0.95807982, 0.9613236 ],\n", - " [ 0.95807982, 1. , 0.90436152],\n", - " [ 0.9613236 , 0.90436152, 1. ]])" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.corrcoef(all_clf_test_pred.T)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The classifiers are highly correlated. Using model blending does not offer any advantage." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Finally selected classifier" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Test AUC: 0.690158415842\n" - ] - } - ], - "source": [ - "from xgboost import XGBClassifier\n", - "from sklearn.metrics import roc_auc_score\n", - "\n", - "bst_selected = XGBClassifier(\n", - " n_estimators=40,\n", - " max_depth=1,\n", - " min_child_weight=40,\n", - " colsample_bytree=.08,\n", - " subsample=.8,\n", - " learning_rate=.1,\n", - " scale_pos_weight=5.,\n", - " seed=12345,\n", - ")\n", - "\n", - "bst_selected.fit(fea_all_k3_nAll_train.values, label_train_nAll)\n", - "print 'Test AUC:', roc_auc_score(\n", - " label_test_nAll, bst_selected.predict_proba(fea_all_k3_nAll_test.values)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Self test AUC: 0.7116949392\n" - ] - } - ], - "source": [ - "from xgboost import XGBClassifier\n", - "\n", - "bst_selected_traintest = XGBClassifier(\n", - " n_estimators=40,\n", - " max_depth=1,\n", - " min_child_weight=40,\n", - " colsample_bytree=.08,\n", - " subsample=.8,\n", - " learning_rate=.1,\n", - " scale_pos_weight=5.,\n", - " seed=12345,\n", - ")\n", - "\n", - "fea_all_k3_nAll_train.columns = fea_all_k3_nAll_test.columns\n", - "fea_all_k3_traintest = pd.concat([fea_all_k3_nAll_train, fea_all_k3_nAll_test], axis=0)\n", - "label_traintest = np.append(label_train_nAll, label_test_nAll)\n", - "bst_selected_traintest.fit(fea_all_k3_traintest.values, label_traintest)\n", - "print 'Self test AUC:', roc_auc_score(\n", - " label_traintest, bst_selected_traintest.predict_proba(fea_all_k3_traintest.values)[:,1])" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(4805, 1428)\n" - ] - } - ], - "source": [ - "print fea_all_k3_traintest.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "# Save classifier info to external file" - ] - }, - { - "cell_type": "code", - "execution_count": 91, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "feature_name = fea_30_k3_nAll_test.columns # 30 is arbitrary.\n", - "nb_img = None\n", - "cutoffs = np.arange(.3, 1., .1)\n", - "k=3\n", - "clf_list = [bst_selected_traintest]" - ] - }, - { - "cell_type": "code", - "execution_count": 92, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Index([u'cc_ben_global_max_intensity', u'cc_ben_nb_regions',\n", - " u'cc_ben_top1_area', u'cc_ben_top1_area_ratio',\n", - " u'cc_ben_top1_area_ratio2', u'cc_ben_top1_eccentricity',\n", - " u'cc_ben_top1_eig1', u'cc_ben_top1_eig2',\n", - " u'cc_ben_top1_equivalent_diameter', u'cc_ben_top1_euler_number',\n", - " ...\n", - " u'mlo_mal_top3_euler_number', u'mlo_mal_top3_extent',\n", - " u'mlo_mal_top3_major_axis_length', u'mlo_mal_top3_max_intensity',\n", - " u'mlo_mal_top3_mean_intensity', u'mlo_mal_top3_minor_axis_length',\n", - " u'mlo_mal_top3_orientation', u'mlo_mal_top3_perimeter',\n", - " u'mlo_mal_top3_solidity', u'mlo_mal_total_area'],\n", - " dtype='object', length=204)\n", - "==========\n", - "[ 0.3 0.4 0.5 0.6 0.7 0.8 0.9]\n" - ] - } - ], - "source": [ - "print feature_name\n", - "print '='*10\n", - "print cutoffs" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import pickle\n", - "pickle.dump((feature_name, nb_img, cutoffs, k, clf_list), open('model5_ftu_clf_info.pkl', 'w'))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/training/roi_clf_train.py b/training/roi_clf_train.py deleted file mode 100644 index 33363d6..0000000 --- a/training/roi_clf_train.py +++ /dev/null @@ -1,223 +0,0 @@ -import os, argparse -import numpy as np -from keras.preprocessing.image import ImageDataGenerator -from keras.optimizers import SGD -from keras.callbacks import ( - ReduceLROnPlateau, - EarlyStopping, -) -from keras.models import load_model -from dm_keras_ext import DMMetrics, DMAucModelCheckpoint -from dm_resnet import ResNetBuilder - -import warnings -import exceptions -warnings.filterwarnings('ignore', category=exceptions.UserWarning) - - -def resize_img_dat(img_dat, img_size): - '''Resize a train or test image ndarray dataset - ''' - import cv2 - - if img_dat.shape[1:3] != tuple(img_size): - resized_dat = np.zeros( - (img_dat.shape[0],) + tuple(img_size) + (img_dat.shape[3],) ) - for i,img in enumerate(img_dat): - img_ = cv2.resize( - img, dsize=(img_size[1], img_size[0]), - interpolation=cv2.INTER_CUBIC) - resized_dat[i] = img_.reshape(img_.shape + (img.shape[2],)) - return resized_dat - else: - return img_dat - - -def run(x_train_fn, x_test_fn, y_train_fn, y_test_fn, - img_size=[256, 256], do_featurewise_norm=True, - rotation_range=0, width_shift_range=.0, height_shift_range=.0, - zoom_range=[1.0, 1.0], horizontal_flip=False, vertical_flip=False, - batch_size=32, nb_epoch=100, pos_cls_weight=1.0, - nb_init_filter=32, init_filter_size=5, init_conv_stride=2, - pool_size=2, pool_stride=2, - weight_decay=.0001, alpha=.0001, l1_ratio=.0, - inp_dropout=.0, hidden_dropout=.0, init_lr=.01, - lr_patience=20, es_patience=40, - resume_from=None, - best_model='./modelState/roi_clf.h5', - final_model="NOSAVE"): - '''Train a deep learning model for ROI classifications - ''' - - # =========== Load training data =============== # - X_train = np.load(x_train_fn) - X_test = np.load(x_test_fn) - X_train = resize_img_dat(X_train, img_size) - X_test = resize_img_dat(X_test, img_size) - y_train = np.load(y_train_fn) - y_test = np.load(y_test_fn) - - # ============ Train & validation set =============== # - if do_featurewise_norm: - imgen = ImageDataGenerator( - featurewise_center=True, - featurewise_std_normalization=True) - imgen.fit(X_train) - else: - imgen = ImageDataGenerator( - samplewise_center=True, - samplewise_std_normalization=True) - imgen.rotation_range = rotation_range - imgen.width_shift_range = width_shift_range - imgen.height_shift_range = height_shift_range - imgen.zoom_range = zoom_range - imgen.horizontal_flip = horizontal_flip - imgen.vertical_flip = vertical_flip - train_generator = imgen.flow(X_train, y_train, batch_size=batch_size, - shuffle=True, seed=12345) - - X_test -= imgen.mean - X_test /= imgen.std - validation_set = (X_test, y_test) - - # ================= Model training ============== # - nb_worker = int(os.getenv('NUM_CPU_CORES', 4)) - if resume_from is not None: - model = load_model( - resume_from, - custom_objects={ - 'sensitivity': DMMetrics.sensitivity, - 'specificity': DMMetrics.specificity - } - ) - else: - model = ResNetBuilder.build_resnet_50( - (1, img_size[0], img_size[1]), 1, - nb_init_filter, init_filter_size, init_conv_stride, - pool_size, pool_stride, - weight_decay, alpha, l1_ratio, - inp_dropout, hidden_dropout) - sgd = SGD(lr=init_lr, momentum=0.9, decay=0.0, nesterov=True) - model.compile(optimizer=sgd, loss='binary_crossentropy', - metrics=[DMMetrics.sensitivity, DMMetrics.specificity]) - reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, - patience=lr_patience, verbose=1) - early_stopping = EarlyStopping(monitor='val_loss', patience=es_patience, verbose=1) - auc_checkpointer = DMAucModelCheckpoint( - best_model, validation_set, batch_size=batch_size) - hist = model.fit_generator( - train_generator, - samples_per_epoch=len(X_train), - nb_epoch=nb_epoch, - class_weight={ 0: 1.0, 1: pos_cls_weight }, - validation_data=validation_set, - callbacks=[reduce_lr, early_stopping, auc_checkpointer], - nb_worker=nb_worker, - pickle_safe=True, # turn on pickle_safe to avoid a strange error. - verbose=2) - - # Training report. - min_loss_locs, = np.where(hist.history['val_loss'] == min(hist.history['val_loss'])) - best_val_loss = hist.history['val_loss'][min_loss_locs[0]] - best_val_sensitivity = hist.history['val_sensitivity'][min_loss_locs[0]] - best_val_specificity = hist.history['val_specificity'][min_loss_locs[0]] - print "\n==== Training summary ====" - print "Minimum val loss achieved at epoch:", min_loss_locs[0] + 1 - print "Best val loss:", best_val_loss - print "Best val sensitivity:", best_val_sensitivity - print "Best val specificity:", best_val_specificity - - if final_model != "NOSAVE": - model.save(final_model) - - return hist - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="DM ROI clf training") - parser.add_argument("x_train_fn", type=str) - parser.add_argument("x_test_fn", type=str) - parser.add_argument("y_train_fn", type=str) - parser.add_argument("y_test_fn", type=str) - - parser.add_argument("--img-size", "-is", dest="img_size", nargs=2, type=int, - default=[256, 256]) - parser.add_argument("--featurewise-norm", dest="do_featurewise_norm", action="store_true") - parser.add_argument("--no-featurewise-norm", dest="do_featurewise_norm", action="store_false") - parser.set_defaults(do_featurewise_norm=True) - parser.add_argument("--batch-size", "-bs", dest="batch_size", type=int, default=32) - parser.add_argument("--rotation-range", dest="rotation_range", type=int, default=0) - parser.add_argument("--width-shift-range", dest="width_shift_range", type=float, default=.0) - parser.add_argument("--height-shift-range", dest="height_shift_range", type=float, default=.0) - parser.add_argument("--zoom-range", dest="zoom_range", nargs=2, type=float, default=[1.0, 1.0]) - parser.add_argument("--horizontal-flip", dest="horizontal_flip", action="store_true") - parser.add_argument("--no-horizontal-flip", dest="horizontal_flip", action="store_false") - parser.set_defaults(horizontal_flip=False) - parser.add_argument("--vertical-flip", dest="vertical_flip", action="store_true") - parser.add_argument("--no-vertical-flip", dest="vertical_flip", action="store_false") - parser.set_defaults(vertical_flip=False) - parser.add_argument("--nb-epoch", "-ne", dest="nb_epoch", type=int, default=100) - parser.add_argument("--pos-class-weight", "-pcw", dest="pos_cls_weight", type=float, default=1.0) - parser.add_argument("--nb-init-filter", "-nif", dest="nb_init_filter", type=int, default=32) - parser.add_argument("--init-filter-size", "-ifs", dest="init_filter_size", type=int, default=5) - parser.add_argument("--init-conv-stride", "-ics", dest="init_conv_stride", type=int, default=2) - parser.add_argument("--max-pooling-size", "-mps", dest="pool_size", type=int, default=2) - parser.add_argument("--max-pooling-stride", "-mpr", dest="pool_stride", type=int, default=2) - parser.add_argument("--weight-decay", "-wd", dest="weight_decay", type=float, default=.0001) - parser.add_argument("--alpha", dest="alpha", type=float, default=.0001) - parser.add_argument("--l1-ratio", dest="l1_ratio", type=float, default=.0) - parser.add_argument("--inp-dropout", "-id", dest="inp_dropout", type=float, default=.0) - parser.add_argument("--hidden-dropout", "-hd", dest="hidden_dropout", type=float, default=.0) - parser.add_argument("--init-learningrate", "-ilr", dest="init_lr", type=float, default=.01) - parser.add_argument("--lr-patience", "-lrp", dest="lr_patience", type=int, default=20) - parser.add_argument("--es-patience", "-esp", dest="es_patience", type=int, default=40) - parser.add_argument("--resume-from", dest="resume_from", type=str, default=None) - parser.add_argument("--no-resume-from", dest="resume_from", action="store_const", const=None) - parser.add_argument("--best-model", "-bm", dest="best_model", type=str, - default="./modelState/roi_clf.h5") - parser.add_argument("--final-model", "-fm", dest="final_model", type=str, - default="NOSAVE") - - args = parser.parse_args() - run_opts = dict( - img_size=args.img_size, - do_featurewise_norm=args.do_featurewise_norm, - batch_size=args.batch_size, - rotation_range=args.rotation_range, - width_shift_range=args.width_shift_range, - height_shift_range=args.height_shift_range, - zoom_range=args.zoom_range, - horizontal_flip=args.horizontal_flip, - vertical_flip=args.vertical_flip, - nb_epoch=args.nb_epoch, - pos_cls_weight=args.pos_cls_weight, - nb_init_filter=args.nb_init_filter, - init_filter_size=args.init_filter_size, - init_conv_stride=args.init_conv_stride, - pool_size=args.pool_size, - pool_stride=args.pool_stride, - weight_decay=args.weight_decay, - alpha=args.alpha, - l1_ratio=args.l1_ratio, - inp_dropout=args.inp_dropout, - hidden_dropout=args.hidden_dropout, - init_lr=args.init_lr, - lr_patience=args.lr_patience, - es_patience=args.es_patience, - resume_from=args.resume_from, - best_model=args.best_model, - final_model=args.final_model - ) - print "\n>>> Model training options: <<<\n", run_opts, "\n" - run(args.x_train_fn, args.x_test_fn, args.y_train_fn, args.y_test_fn, - **run_opts) - - - - - - - - - diff --git a/training/score_candidROI.sh b/training/score_candidROI.sh deleted file mode 100755 index 2ba25f1..0000000 --- a/training/score_candidROI.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -EXAM_TSV="/metadata/exams_metadata.tsv" -IMG_FOLDER="/trainingData" -IMG_EXT="dcm" -# resnet50_candidROI_mulcls_bestAuc_model2.h5 resnet50_candidROI_mulcls_final_model2.h5 -# DL_STATE="/resnet50_candidROI_mulcls_final_model2.h5" -DL_STATE="resnet50_candidROI_ONE_bestAuc_model2.h5" -TRAIN_OUT="/modelState/meta_prob_train_ONE2_best.pkl" -TEST_OUT="/modelState/meta_prob_test_ONE2_best.pkl" - -echo -n "Start training: " && date -echo - -python dm_candidROI_score.py \ - --img-extension $IMG_EXT \ - --img-height 1024 \ - --img-scale 4095 \ - --val-size 8000 \ - --neg-vs-pos-ratio 10.0 \ - --featurewise-norm \ - --featurewise-mean 873.6 \ - --featurewise-std 739.3 \ - --img-per-batch 4 \ - --roi-per-img 32 \ - --roi-size 256 256 \ - --low-int-threshold 0.05 \ - --blob-min-area 3 \ - --blob-min-int 0.5 \ - --blob-max-int 0.85 \ - --blob-th-step 10 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --train-out $TRAIN_OUT \ - --test-out $TEST_OUT \ - $IMG_FOLDER $DL_STATE - -echo -echo -n "End training: " && date - - - - - - - - - - - - - diff --git a/training/score_candidROI_local.sh b/training/score_candidROI_local.sh deleted file mode 100755 index 454f615..0000000 --- a/training/score_candidROI_local.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="./metadata/images_crosswalk.tsv" -EXAM_TSV="./metadata/exams_metadata.tsv" -IMG_FOLDER="./trainingData" -IMG_EXT="dcm" -DL_STATE="modelState/resnet50_candidROI_mulcls_local_bestAuc_model.h5" -# DL_STATE="modelState/2017-02-21_candidROI_net50_mc/resnet50_candidROI_mulcls_bestAuc_model.h5" -TRAIN_OUT="./modelState/meta_prob_train.pkl" -TEST_OUT="./modelState/meta_prob_test.pkl" - -echo -n "Start training: " && date -echo - -python dm_candidROI_score.py \ - --img-extension $IMG_EXT \ - --img-height 1024 \ - --img-scale 4095 \ - --val-size 0.3 \ - --neg-vs-pos-ratio 10.0 \ - --featurewise-norm \ - --featurewise-mean 873.6 \ - --featurewise-std 739.3 \ - --img-per-batch 4 \ - --roi-per-img 8 \ - --roi-size 256 256 \ - --low-int-threshold 0.05 \ - --blob-min-area 3 \ - --blob-min-int 0.5 \ - --blob-max-int 0.85 \ - --blob-th-step 10 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --train-out $TRAIN_OUT \ - --test-out $TEST_OUT \ - $IMG_FOLDER $DL_STATE - -echo -echo -n "End training: " && date - - - - - - - - - - - - - diff --git a/training/score_heatmap.sh b/training/score_heatmap.sh deleted file mode 100755 index 5aacdbb..0000000 --- a/training/score_heatmap.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -EXAM_TSV="/metadata/exams_metadata.tsv" -IMG_FOLDER="/trainingData" -IMG_EXT="dcm" -DL_STATE="/3cls_best_model5_finetuned.h5" -OUT="modelState/3cls_best_model5_finetuned_phm_s128_add.pkl" -PREDICTED="/m5_ftu_phm_s128_predicted_subjs.npy" - -echo -n "Start training: " && date -echo - -python dm_heatmap_score.py \ - --img-extension $IMG_EXT \ - --img-height 4096 \ - --img-scale 255.0 \ - --equalize-hist \ - --featurewise-center \ - --featurewise-mean 91.6 \ - --neg-vs-pos-ratio 1.0 \ - --net resnet50 \ - --batch-size 256 \ - --patch-size 256 \ - --stride 128 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --out $OUT \ - --predicted-subj-file $PREDICTED \ - --add-subjs 800 \ - $IMG_FOLDER $DL_STATE - -echo -echo -n "End training: " && date - - - - - - - - - - - - - diff --git a/training/score_heatmap_local.sh b/training/score_heatmap_local.sh deleted file mode 100755 index 0d0987e..0000000 --- a/training/score_heatmap_local.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash - -# export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="./metadata/images_crosswalk.tsv" -EXAM_TSV="./metadata/exams_metadata.tsv" -IMG_FOLDER="trainingData" -IMG_EXT="dcm" -#DL_STATE="modelState/2017-04-19_patch_im4096_256/3cls_best_model5.h5" -#DL_STATE="modelState/2017-04-19_patch_im4096_256/3cls_best_model5_pilot_finetuned.h5" -DL_STATE="modelState/2017-04-19_patch_im4096_256/3cls_best_model5_finetuned.h5" -#DL_STATE="modelState/2017-03-31_patch_im1024_96/all_cls_best_model3.h5" -#DL_STATE="modelState/2017-03-16_candidROI_net50_mc5/resnet50_candidROI_mulcls_bestAuc_model5.h5" -#OUT="./modelState/2017-03-31_patch_im1024_96/all_cls_best_model3_phm_s24.pkl" -#OUT="./modelState/2017-04-19_patch_im4096_256/pilot_model5_DMfinetuned_phm_s128.pkl" -OUT="./scratch/add_subj_phm.pkl" -PREDICTED="./scratch/predicted_subjs.npy" -#OUT="modelState/2017-03-16_candidROI_net50_mc5/mc5_best_phm_s6.h5" - -echo -n "Start training: " && date -echo - -#export CUDA_VISIBLE_DEVICES="" - -python dm_heatmap_score.py \ - --img-extension $IMG_EXT \ - --img-height 4096 \ - --img-scale 255.0 \ - --equalize-hist \ - --featurewise-center \ - --featurewise-mean 91.6 \ - --no-neg-vs-pos-ratio \ - --net resnet50 \ - --batch-size 64 \ - --patch-size 256 \ - --stride 128 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --out $OUT \ - --predicted-subj-file $PREDICTED \ - --add-subjs 10 \ - $IMG_FOLDER $DL_STATE - -echo -echo -n "End training: " && date - - - - - - - - - - - - - diff --git a/training/tf_example/DREAM_DM_starter_tf.py b/training/tf_example/DREAM_DM_starter_tf.py deleted file mode 100644 index 1426e7b..0000000 --- a/training/tf_example/DREAM_DM_starter_tf.py +++ /dev/null @@ -1,820 +0,0 @@ -from __future__ import print_function -import argparse -import csv -import dicom -import gzip -from gzip import GzipFile -import numpy as np -from os import listdir, remove, mkdir -from os.path import isfile, join, isdir -import scipy.misc -from sklearn.cross_validation import train_test_split -import tensorflow as tf -import tflearn -import sys -import time - - -def super_print(statement, f): - """ - This basically prints everything in statement. - We'll add a new line character for the output file. - We'll just use print for the output. - INPUTS: - - statement: (string) the string to print. - - f: (opened file) this is the output file object to print to - """ - sys.stdout.write(statement + '\n') - sys.stdout.flush() - f.write(statement + '\n') - return 0 - - -def create_test_splits(path_csv_test): - """ - Goes through the data folder and divides for testing. - INPUTS: - - path_csv_test: (string) path to test csv - """ - X_tr = [] - X_te = [] - Y_tr = [] - Y_te = [] - # First, let's map examID and laterality to fileName - dict_X_left = {} - dict_X_right = {} - counter = 0 - with open(path_csv_test, 'r') as file_crosswalk: - reader_crosswalk = csv.reader(file_crosswalk, delimiter='\t') - for row in reader_crosswalk: - if counter == 0: - counter += 1 - continue - if row[3].strip()=='R': - dict_X_right[row[0].strip()] = row[4].strip() - X_te.append((row[0].strip(), 'R', row[4].strip())) - elif row[3].strip()=='L': - dict_X_left[row[0].strip()] = row[4].strip() - X_te.append((row[0].strip(), 'L', row[4].strip())) - #for key_X in set(dict_X_left.keys()) & set(dict_X_right.keys()): - # X_te.append((dict_X_left[key_X], dict_X_right[key_X])) - return X_tr, X_te, Y_tr, Y_te - - -def create_data_splits(path_csv_crosswalk, path_csv_metadata): - """ - Goes through data folder and divides train/val. - INPUTS: - - path_csv_crosswalk: (string) path to first csv file - - path_csv_metadata: (string) path to second csv file - There should be two csv files. The first will relate the filename - to the actual patient ID and L/R side, then the second csv file - will relate this to whether we get the cancer. This is ridiculous. - Very very very bad filesystem. Hope this gets better. - """ - # First, let's map the .dcm.gz file to a (patientID, examIndex, imageView) - # tuple. - dict_img_to_patside = {} - counter = 0 - with open(path_csv_crosswalk, 'r') as file_crosswalk: - reader_crosswalk = csv.reader(file_crosswalk, delimiter='\t') - for row in reader_crosswalk: - if counter == 0: - counter += 1 - continue - dict_img_to_patside[row[5].strip()] = ( - row[0].strip(), row[4].strip()) - # Now, let's map the tuple to cancer or non-cancer. - dict_tuple_to_cancer = {} - counter = 0 - with open(path_csv_metadata, 'r') as file_metadata: - reader_metadata = csv.reader(file_metadata, delimiter='\t') - for row in reader_metadata: - if counter == 0: - counter += 1 - continue - try: - dict_tuple_to_cancer[(row[0].strip(), 'L')] = int(row[3]) - dict_tuple_to_cancer[(row[0].strip(), 'R')] = int(row[4]) - except ValueError: - pass - # Alright, now, let's connect those dictionaries together... - X_tot = [] - Y_tot = [] - for img_name in dict_img_to_patside: - if dict_img_to_patside[img_name] in dict_tuple_to_cancer: - X_tot.append(img_name) - Y_tot.append(dict_tuple_to_cancer[dict_img_to_patside[img_name]]) - # Making train/val split and returning. - X_tr, X_te, Y_tr, Y_te = train_test_split(X_tot, Y_tot, test_size=0.2) - return X_tr, X_te, Y_tr, Y_te - - -def create_data_splits2(path_csv_crosswalk, path_csv_metadata, seed, f): - '''A substitute function for create_data_splits using DMMetaManager - Args: - seed (int): an integer to seed the random state for split. - ''' - from meta import DMMetaManager - - meta_man = DMMetaManager(exam_tsv=path_csv_metadata, - img_tsv=path_csv_crosswalk, - img_folder='', - img_extension='dcm') - img_list, lab_list = meta_man.get_flatten_img_list() - X_tr, X_te, Y_tr, Y_te = train_test_split(img_list, lab_list, test_size=0.2, - random_state=seed) - val_n_pos = sum(np.array(Y_te) == 1) - val_n_neg = sum(np.array(Y_te) == 0) - statement = "Validation number of positive cases: %d, negative cases: %d." - statement = statement % (val_n_pos, val_n_neg) - super_print(statement, f) - return X_tr, X_te, Y_tr, Y_te - - -def read_in_one_image(path_img, name_img, matrix_size, data_aug=False): - """ - This is SUPER basic. This can be improved. - Basically, all data is stored as a .dcm.gz. - First, we'll uncompress and save as temp.dcm. - Then we'll read in the dcm to get to the array. - We'll resize the image to [matrix_size, matrix_size]. - We'll also convert to a np.float32 and zero-center 1-scale the data. - INPUTS: - - path_img: (string) path to the data - - name_img: (string) name of the image e.g. '123456.dcm' - - matrix_size: (int) one dimension of the square image e.g. 224 - """ - # Setting up the filepaths and opening up the format. - #filepath_temp = join(path_img, 'temp.dcm') - filepath_img = join(path_img, name_img) - if not isfile(filepath_img): - filepath_img += '.gz' - if not isfile(filepath_img): - raise Exception - # Reading/uncompressing/writing - # if isfile(filepath_temp): - # remove(filepath_temp) - # with gzip.open(filepath_img, 'rb') as f_gzip: - # file_content = f_gzip.read() - # with open(filepath_temp, 'w') as f_dcm: - # f_dcm.write(file_content) - # Reading in dicom file to ndarray and processing - if '.dcm.gz' in filepath_img: - dicom_content = dicom.read_file(GzipFile(filepath_img)) - else: - dicom_content = dicom.read_file(filepath_img) - img = dicom_content.pixel_array - img = scipy.misc.imresize(img, (matrix_size, matrix_size), interp='cubic') - img = img.astype(np.float32) - img -= np.mean(img) - img /= np.std(img) - # Removing temporary file. - # remove(filepath_temp) - # Let's do some stochastic data augmentation. - if not data_aug: - return img - if np.random.rand() > 0.5: # flip left-right - img = np.fliplr(img) - num_rot = np.random.choice(4) # rotate 90 randomly - img = np.rot90(img, num_rot) - up_bound = np.random.choice(174) # zero out square - right_bound = np.random.choice(174) - img[up_bound:(up_bound + 50), right_bound:(right_bound + 50)] = 0.0 - return img - - -def conv2d(l_input, filt_size, filt_num, stride=1, alpha=0.1, name="conv2d", norm="bn"): - """ - A simple 2-dimensional convolution layer. - Layer Architecture: 2d-convolution - bias-addition - batch_norm - reLU - All weights are created with a (hopefully) unique scope. - INPUTS: - - l_input: (tensor.4d) input of size [batch_size, layer_width, layer_height, channels] - - filt_size: (int) size of the square filter to be made - - filt_num: (int) number of filters to be made - - stride: (int) stride of our convolution - - alpha: (float) for the leaky ReLU. Do 0.0 for ReLU. - - name: (string) unique name for this convolution layer - - norm: (string) to decide which normalization to use ("bn", "lrn", None) - """ - # Creating and Doing the Convolution. - input_size = l_input.get_shape().as_list() - weight_shape = [filt_size, filt_size, input_size[3], filt_num] - std = 0.01 # np.sqrt(2.0 / (filt_size * filt_size * input_size[3])) - with tf.variable_scope(name + "_conv_weights"): - W = tf.get_variable( - "W", weight_shape, initializer=tf.random_normal_initializer(stddev=std)) - tf.add_to_collection("reg_variables", W) - conv_layer = tf.nn.conv2d( - l_input, W, strides=[1, stride, stride, 1], padding='SAME') - # Normalization - if norm == "bn": - norm_layer = tflearn.layers.normalization.batch_normalization( - conv_layer, name=(name + "_batch_norm"), decay=0.9) - elif norm == "lrn": - norm_layer = tflearn.layers.normalization.local_response_normalization( - conv_layer) - # ReLU - relu_layer = tf.maximum(norm_layer, norm_layer * alpha) - return relu_layer - - -def max_pool(l_input, k=2, stride=None): - """ - A simple 2-dimensional max pooling layer. - Strides and size of max pool kernel is constrained to be the same. - INPUTS: - - l_input: (tensor.4d) input of size [batch_size, layer_width, layer_height, channels] - - k: (int) size of the max_filter to be made. also size of stride. - """ - if stride == None: - stride = k - # Doing the Max Pool - max_layer = tf.nn.max_pool(l_input, ksize=[1, k, k, 1], strides=[ - 1, stride, stride, 1], padding='SAME') - return max_layer - - -def incept(l_input, kSize=[16, 16, 16, 16, 16, 16], name="incept", norm="bn"): - """ - So, this is the classical incept layer. - INPUTS: - - l_input: (tensor.4d) input of size [batch_size, layer_width, layer_height, channels] - - ksize: (array (6,)) [1x1, 3x3reduce, 3x3, 5x5reduce, 5x5, poolproj] - - name: (string) name of incept layer - - norm: (string) to decide which normalization ("bn", "lrn", None) - """ - layer_1x1 = conv2d(l_input, 1, kSize[0], name=(name + "_1x1"), norm=norm) - layer_3x3a = conv2d(l_input, 1, kSize[1], name=(name + "_3x3a"), norm=norm) - layer_3x3b = conv2d(layer_3x3a, 3, kSize[ - 2], name=(name + "_3x3b"), norm=norm) - layer_5x5a = conv2d(l_input, 1, kSize[3], name=(name + "_5x5a"), norm=norm) - layer_5x5b = conv2d(layer_5x5a, 5, kSize[ - 4], name=(name + "_5x5b"), norm=norm) - layer_poola = max_pool(l_input, k=3, stride=1) - layer_poolb = conv2d(layer_poola, 1, kSize[ - 5], name=(name + "_poolb"), norm=norm) - return tf.concat(3, [layer_1x1, layer_3x3b, layer_5x5b, layer_poolb]) - - -def dense(l_input, hidden_size, keep_prob, alpha=0.1, name="dense"): - """ - Dense (Fully Connected) layer. - Architecture: reshape - Affine - batch_norm - dropout - relu - WARNING: should not be the output layer. Use "output" for that. - INPUTS: - - l_input: (tensor.2d or more) basically, of size [batch_size, etc...] - - hidden_size: (int) Number of hidden neurons. - - keep_prob: (float) Probability to keep neuron during dropout layer. - - alpha: (float) Slope for leaky ReLU. Set 0.0 for ReLU. - - name: (string) unique name for layer. - """ - # Flatten Input Layer - input_size = l_input.get_shape().as_list() - reshape_size = 1 - for iter_size in range(1, len(input_size)): - reshape_size *= input_size[iter_size] - reshape_layer = tf.reshape(l_input, [-1, reshape_size]) - # Creating and Doing Affine Transformation - weight_shape = [reshape_layer.get_shape().as_list()[1], hidden_size] - std = 0.01 # np.sqrt(2.0 / reshape_layer.get_shape().as_list()[1]) - with tf.variable_scope(name + "_dense_weights"): - W = tf.get_variable( - "W", weight_shape, initializer=tf.random_normal_initializer(stddev=std)) - tf.add_to_collection("reg_variables", W) - affine_layer = tf.matmul(reshape_layer, W) - # Batch Normalization - norm_layer = tflearn.layers.normalization.batch_normalization( - affine_layer, name=(name + "_batch_norm"), decay=0.9) - # Dropout - dropout_layer = tf.nn.dropout(norm_layer, keep_prob) - # ReLU - relu_layer = tf.maximum(dropout_layer, dropout_layer * alpha) - return relu_layer - - -def output(l_input, output_size, name="output"): - """ - Output layer. Just a simple affine transformation. - INPUTS: - - l_input: (tensor.2d or more) basically, of size [batch_size, etc...] - - output_size: (int) basically, number of classes we're predicting - - name: (string) unique name for layer. - """ - # Flatten Input Layer - input_size = l_input.get_shape().as_list() - reshape_size = 1 - for iter_size in range(1, len(input_size)): - reshape_size *= input_size[iter_size] - reshape_layer = tf.reshape(l_input, [-1, reshape_size]) - # Creating and Doing Affine Transformation - weight_shape = [reshape_layer.get_shape().as_list()[1], output_size] - std = 0.01 # np.sqrt(2.0 / reshape_layer.get_shape().as_list()[1]) - with tf.variable_scope(name + "_output_weights"): - W = tf.get_variable( - "W", weight_shape, initializer=tf.random_normal_initializer(stddev=std)) - b = tf.get_variable( - "b", output_size, initializer=tf.constant_initializer(0.0)) - tf.add_to_collection("reg_variables", W) - affine_layer = tf.matmul(reshape_layer, W) + b - return affine_layer - - -def get_L2_loss(reg_param, key="reg_variables"): - """ - L2 Loss Layer. Usually will use "reg_variables" collection. - INPUTS: - - reg_param: (float) the lambda value for regularization. - - key: (string) the key for the tf collection to get from. - """ - L2_loss = 0.0 - for W in tf.get_collection(key): - L2_loss += reg_param * tf.nn.l2_loss(W) - return L2_loss - - -def get_CE_loss(logits, labels): - """ - This calculates the cross entropy loss. - Modular function made just because tf program name is long. - INPUTS: - - logits: (tensor.2d) logit probability values. - - labels: (array of ints) basically, label \in {0,...,L-1} - """ - return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels)) - - -def get_accuracy(logits, labels): - """ - Calculates accuracy of predictions. Softmax based on largest. - INPUTS: - - logits: (tensor.2d) logit probability values. - - labels: (array of ints) basically, label \in {0,...,L-1} - """ - pred_labels = tf.argmax(logits, 1) - correct_pred = tf.equal(pred_labels, labels) - accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) - return accuracy - - -def get_optimizer(cost, lr=0.001, decay=1.0, epoch_every=10): - """ - Creates an optimizer based on learning rate and loss. - We will use Adam optimizer. This may have to change in the future. - INPUTS: - - cost: (tf value) usually sum of L2 loss and CE loss - - lr: (float) the learning rate. - - decay: (float) how much to decay each epoch. - - epoch_every: (int) how many iterations is an epoch. - """ - global_step = tf.Variable(0, trainable=False) - starter_learning_rate = float(lr) - learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, - epoch_every, decay, staircase=True) - optimizer = tf.train.AdamOptimizer( - learning_rate).minimize(cost, global_step=global_step) - return optimizer - - -def Alex_conv(layer, b_name=""): - """ - The convolution part of the classic Alex Net. - Everything has been hardcoded to show example of use. - INPUT: - - layer: (tensor.4d) input tensor. - - b_name: (string) branch name. If not doing branch, doesn't matter. - """ - conv1 = conv2d(layer, 11, 96, stride=4, name=b_name + "conv1") - pool1 = max_pool(conv1, k=2) - conv2 = conv2d(pool1, 11, 256, name=b_name + "conv2") - pool2 = max_pool(conv2, k=2) - conv3 = conv2d(pool2, 3, 384, name=b_name + "conv3") - conv4 = conv2d(conv3, 3, 384, name=b_name + "conv4") - conv5 = conv2d(conv3, 3, 256, name=b_name + "conv5") - pool5 = max_pool(conv5, k=2) - return pool5 - - -def general_conv(layer, architecture_conv, b_name="", norm="bn"): - """ - A generalized convolution block that takes an architecture. - INPUTS: - - layer: (tensor.4d) input tensor. - - architecture_conv: (list of lists) - [[filt_size, filt_num, stride], ..., [0, poolSize], - [filt_size, filt_num, stride], ..., [0, poolSize], - ...] - - b_name: (string) branch name. If not doing branch, doesn't matter. - """ - for conv_iter, conv_numbers in enumerate(architecture_conv): - if conv_numbers[0] == 0: - layer = max_pool(layer, k=conv_numbers[1]) - else: - if len(conv_numbers) == 2: - conv_numbers.append(1) - layer = conv2d(layer, conv_numbers[0], conv_numbers[1], stride=conv_numbers[2], - name=(b_name + "conv" + str(conv_iter)), norm=norm) - return layer - - -def GoogLe_conv(layer, b_name="", norm="bn"): - """ - This should be the convolution layers of the GoogLe net. - We follow the v1 architecture as laid out by - http://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf - INPUTS: - - layer: (tensor.4d) input tensor - - b_name: (string) branch name, if necessary. - - norm: (string) which normalization to use. - """ - conv1 = conv2d(layer, 7, 64, stride=2, name=b_name + "conv1", norm=norm) - pool1 = max_pool(conv1, k=3, stride=2) - conv2a = conv2d(pool1, 1, 64, name=b_name + "conv2a", norm=norm) - conv2b = conv2d(conv2a, 3, 192, name=b_name + "conv2b", norm=norm) - pool2 = max_pool(conv2b, k=3, stride=2) - incept3a = incept( - pool2, kSize=[64, 96, 128, 16, 32, 32], name=b_name + "incept3a", norm=norm) - incept3b = incept(incept3a, kSize=[ - 128, 128, 192, 32, 96, 64], name=b_name + "incept3b", norm=norm) - pool3 = max_pool(incept3b, k=3, stride=2) - incept4a = incept( - pool3, kSize=[192, 96, 208, 16, 48, 64], name=b_name + "incept4a", norm=norm) - incept4b = incept(incept4a, kSize=[ - 160, 112, 224, 24, 64, 64], name=b_name + "incept4b", norm=norm) - incept4c = incept(incept4b, kSize=[ - 128, 128, 256, 24, 64, 64], name=b_name + "incept4c", norm=norm) - incept4d = incept(incept4c, kSize=[ - 112, 144, 288, 32, 64, 64], name=b_name + "incept4d", norm=norm) - incept4e = incept(incept4d, kSize=[ - 256, 160, 320, 32, 128, 128], name=b_name + "incept4e", norm=norm) - pool4 = max_pool(incept4e, k=3, stride=2) - incept5a = incept( - pool4, kSize=[256, 160, 320, 32, 128, 128], name=b_name + "incept5a", norm=norm) - incept5b = incept(incept5a, kSize=[ - 384, 192, 384, 48, 128, 128], name=b_name + "incept5b", norm=norm) - size_pool = incept5b.get_shape().as_list()[1] - pool5 = tf.nn.avg_pool(incept5b, ksize=[1, size_pool, size_pool, 1], strides=[ - 1, 1, 1, 1], padding='VALID') - return pool5 - - -def Le_Net(X, output_size, keep_prob=1.0, name=""): - """ - Very Simple Lenet - INPUTS: - - X: (tensor.4d) input tensor. - - output_size: (int) number of classes we're predicting - - keep_prob: (float) probability to keep during dropout. should be 0.4 at train. - """ - layer = X - conv1 = conv2d(layer, 5, 6, stride=1, name=name + "conv1", norm="bn") - pool1 = max_pool(conv1, k=2, stride=2) - conv2 = conv2d(pool1, 3, 16, stride=1, name=name + "conv2", norm="bn") - pool2 = max_pool(conv2, k=2, stride=2) - dense1 = dense(pool2, 120, keep_prob, name=name + "dense1") - return output(dense1, output_size, name=name + "output") - - -def GoogLe_Net(X, output_size, keep_prob=1.0, name=""): - """ - This is the famous GoogLeNet incarnation of the inception network. - All the power is in the convs, so this is quite simple. - INPUTS: - - X: (tensor.4d) input tensor. - - output_size: (int) number of classes we're predicting - - keep_prob: (float) probability to keep during dropout. should be 0.4 at train. - """ - layer = GoogLe_conv(X, b_name=name) - drop1 = tf.nn.dropout(layer, keep_prob) - return output(layer, output_size, name=name + "output") - - -def Alex_Net(X, output_size, keep_prob=1.0, name=""): - """ - The classic alex net architecture. - INPUTS: - - X: (tensor.4d) A tensor with dimensions (none, width, height, num_channels) - - output_size: (int) The number of classes there are. - - keep_prob: (float) Chance of keeping a neuron during dropout. - """ - layer = X - layer = Alex_conv(layer, b_name=name) - dense1 = dense(layer, 4096, keep_prob, name=name + "dense1") - dense2 = dense(dense1, 4096, keep_prob, name=name + "dense2") - return output(dense2, output_size, name=name + "output") - - -def VGG16_Net(X, output_size, keep_prob=1.0): - """ - The classic VGG16 net architecture. - INPUTS: - - X: (tensor.4d) A tensor with dimensions (none, width, height, num_channels) - - output_size: (int) The number of classes there are. - - keep_prob: (float) Chance of keeping a neuron during dropout. - """ - architecture_conv = [[3, 64], [3, 64], [0, 2], - [3, 128], [3, 128], [0, 2], - [3, 256], [3, 256], [3, 256], [0, 2], - [3, 512], [3, 512], [3, 512], [0, 2], - [3, 512], [3, 512], [3, 512], [0, 2]] - layer = general_conv(X, architecture_conv, b_name=name) - layer = dense(layer, 4096, keep_prob, name=name + "dense1") - layer = dense(layer, 4096, keep_prob, name=name + "dense2") - return output(layer, output_size, name=name + "output") - - -def test_out(sess, list_dims, list_placeholders, list_operations, X_te, opts): - """ - This code is to call a test on the validation set. - INPUTS: - - sess: (tf session) the session to run everything on - - list_dim: (list of ints) list of dimensions - - list_placeholders: (list of tensors) list of the placeholders for feed_dict - - list_operations: (list of tensors) list of operations for graph access - - X_tr: (list of strings) list of training sample names - - opts: (parsed arguments) - """ - # Let's unpack the lists - matrix_size, num_channels = list_dims - x, y, keep_prob = list_placeholders - prob, pred, saver, L2_loss, CE_loss, cost, optimizer, accuracy, init = list_operations - # Initializing what to put in. - dataXX = np.zeros((1, matrix_size, matrix_size, - num_channels), dtype=np.float32) - # Running through the images. - f = open(opts.outtxt, 'w') - statement = 'subjectID' + '\t' + 'laterality' + '\t' + 'prediction' - super_print(statement, f) - for iter_data in range(len(X_te)): - id_iter, lat_iter, img_iter = X_te[iter_data] - dataXX[0, :, :, 0] = read_in_one_image(opts.path_data, img_iter, matrix_size) - tflearn.is_training(False) - pred_iter = sess.run(prob, feed_dict={x: dataXX, keep_prob: 1.0}) - statement = id_iter + '\t' + lat_iter + '\t' + str(pred_iter[0][1]) - super_print(statement, f) - #left_img, right_img = X_te[iter_data] - #dataXX[0, :, :, 0] = read_in_one_image(opts.path_data, left_img, matrix_size) - #tflearn.is_training(False) - #pred_left = sess.run(pred, feed_dict={x: dataXX, keep_prob: 1.0}) - #dataXX[0, :, :, 0] = read_in_one_image(opts.path_data, right_img, matrix_size) - #pred_right = sess.run(pred, feed_dict={x: dataXX, keep_prob: 1.0}) - #statement = str(pred_left) + '\t' + str(pred_right) - #super_print(statement, f) - f.close() - - -def test_all(sess, list_dims, list_placeholders, list_operations, X_te, Y_te, opts): - """ - This code is to call a test on the validation set. - INPUTS: - - sess: (tf session) the session to run everything on - - list_dim: (list of ints) list of dimensions - - list_placeholders: (list of tensors) list of the placeholders for feed_dict - - list_operations: (list of tensors) list of operations for graph access - - X_tr: (list of strings) list of training sample names - - Y_tr: (list of ints) list of lables for training samples - - opts: (parsed arguments) - """ - # Let's unpack the lists. - matrix_size, num_channels = list_dims - x, y, keep_prob = list_placeholders - prob, pred, saver, L2_loss, CE_loss, cost, optimizer, accuracy, init = list_operations - # Initializing what to put in. - loss_te = 0.0 - acc_te = 0.0 - dataXX = np.zeros((1, matrix_size, matrix_size, - num_channels), dtype=np.float32) - dataYY = np.zeros((1, ), dtype=np.int64) - # Running through all test data points - v_TP = 0.0 - v_FP = 0.0 - v_FN = 0.0 - v_TN = 0.0 - for iter_data in range(len(X_te)): - # Reading in the data - dataXX[0, :, :, 0] = read_in_one_image( - opts.path_data, X_te[iter_data], matrix_size) - dataYY[0] = Y_te[iter_data] - tflearn.is_training(False) - loss_iter, acc_iter = sess.run((cost, accuracy), feed_dict={ - x: dataXX, y: dataYY, keep_prob: 1.0}) - # Figuring out the ROC stuff - if Y_te[iter_data] == 1: - if acc_iter == 1: - v_TP += 1.0 / len(X_te) - else: - v_FN += 1.0 / len(X_te) - else: - if acc_iter == 1: - v_TN += 1.0 / len(X_te) - else: - v_FP += 1.0 / len(X_te) - # Adding to total accuracy and loss - loss_te += loss_iter / len(X_te) - acc_te += acc_iter / len(X_te) - return (loss_te, acc_te, [v_TP, v_FP, v_TN, v_FN]) - - -def train_one_iteration(sess, list_dims, list_placeholders, list_operations, X_tr, Y_tr, opts): - """ - Basically, run one iteration of the training. - INPUTS: - - sess: (tf session) the session to run everything on - - list_dim: (list of ints) list of dimensions - - list_placeholders: (list of tensors) list of the placeholders for feed_dict - - list_operations: (list of tensors) list of operations for graph access - - X_tr: (list of strings) list of training sample names - - Y_tr: (list of ints) list of lables for training samples - - opts: (parsed arguments) - """ - # Let's unpack the lists. - matrix_size, num_channels = list_dims - x, y, keep_prob = list_placeholders - prob, pred, saver, L2_loss, CE_loss, cost, optimizer, accuracy, init = list_operations - # Initializing what to put in. - dataXX = np.zeros((opts.bs, matrix_size, matrix_size, - num_channels), dtype=np.float32) - dataYY = np.zeros((opts.bs, ), dtype=np.int64) - ind_list = np.random.choice(range(len(X_tr)), opts.bs, replace=False) - # Fill in our dataXX and dataYY for training one batch. - for iter_data,ind in enumerate(ind_list): - dataXX[iter_data, :, :, 0] = read_in_one_image(opts.path_data, X_tr[ind], matrix_size, data_aug=False) - dataYY[iter_data] = Y_tr[ind] - tflearn.is_training(True) - _, loss_iter, acc_iter = sess.run((optimizer, cost, accuracy), feed_dict={ - x: dataXX, y: dataYY, keep_prob: opts.dropout}) - return (loss_iter, acc_iter) - - -def train_net(X_tr, X_te, Y_tr, Y_te, opts, f): - """ - Training of the net. All we need is data names and parameters. - INPUTS: - - X_tr: (list of strings) training image names - - X_te: (list of strings) validation image names - - Y_tr: (list of ints) training labels - - Y_te: (list of ints) validation labels - - opts: parsed argument thing - - f: (opened file) for output writing - """ - # Setting the size and number of channels of input. - matrix_size = opts.matrix_size - num_channels = 1 - list_dims = [matrix_size, num_channels] - # Finding out other constant values to be used. - data_count = len(X_tr) - iter_count = int(np.ceil(float(opts.epoch) * data_count / opts.bs)) - epoch_every = int(np.ceil(float(iter_count) / opts.epoch)) - print_every = min([100, epoch_every]) - max_val_acc = 0.0 - super_print("User specified device: " + opts.device, f) - with tf.device(opts.device): - # Creating Placeholders - x = tf.placeholder( - tf.float32, [None, matrix_size, matrix_size, num_channels]) - y = tf.placeholder(tf.int64) - keep_prob = tf.placeholder(tf.float32) - list_placeholders = [x, y, keep_prob] - # Create the network - if opts.net == "Alex": - pred = Alex_Net(x, 2, keep_prob=keep_prob) - elif opts.net == "Le": - pred = Le_Net(x, 2, keep_prob=keep_prob) - elif opts.net == "VGG16": - pred = VGG16_Net(x, 2, keep_prob=keep_prob) - elif opts.net == "GoogLe": - pred = GoogLe_Net(x, 2, keep_prob=keep_prob) - else: - statement = "Please specify valid network (e.g. Alex, VGG16, GoogLe)." - super_print(statement, f) - return 0 - # Define Operations in TF Graph - saver = tf.train.Saver() - L2_loss = get_L2_loss(opts.reg) - CE_loss = get_CE_loss(pred, y) - cost = L2_loss + CE_loss - prob = tf.nn.softmax(pred) - optimizer = get_optimizer( - cost, lr=opts.lr, decay=opts.decay, epoch_every=epoch_every) - accuracy = get_accuracy(pred, y) - init = tf.initialize_all_variables() - list_operations = [prob, pred, saver, L2_loss, - CE_loss, cost, optimizer, accuracy, init] - # Do the Training - print("Training Started...") - start_time = time.time() - config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) - with tf.Session(config=config) as sess: - sess.run(init) - loss_tr = 0.0 - acc_tr = 0.0 - if opts.test: - saver.restore(sess, opts.saver) - test_out(sess, list_dims, list_placeholders, - list_operations, X_te, opts) - return 0 - for iter in range(iter_count): - loss_temp, acc_temp = train_one_iteration( - sess, list_dims, list_placeholders, list_operations, X_tr, Y_tr, opts) - loss_tr += loss_temp / print_every - acc_tr += acc_temp / print_every - if ((iter) % print_every) == 0: - current_time = time.time() - loss_te, acc_te, ROC_values = test_all( - sess, list_dims, list_placeholders, list_operations, X_te, Y_te, opts) - # Printing out stuff - statement = " Iter" + \ - str(iter + 1) + ": " + \ - str((current_time - start_time) / 60) - statement += ", Acc_tr: " + str(acc_tr) - statement += ", Acc_val: " + str(acc_te) - statement += ", Loss_tr: " + str(loss_tr) - statement += ", Loss_val: " + str(loss_te) - super_print(statement, f) - statement = " True_Positive: " + str(ROC_values[0]) - statement += ", False_Positive: " + str(ROC_values[1]) - statement += ", True_Negative: " + str(ROC_values[2]) - statement += ", False_Negative: " + str(ROC_values[3]) - super_print(statement, f) - loss_tr = 0.0 - acc_tr = 0.0 - if acc_te > max_val_acc: - max_val_acc = acc_te - saver.save(sess, opts.saver) - if (current_time - start_time) / 60 > opts.time: - break - statement = "Best you could do: " + str(max_val_acc) - super_print(statement, f) - return 0 - - -def main(args): - """ - Main Function to do deep learning using tensorflow on pilot. - INPUTS: - - args: (list of strings) command line arguments - """ - # Setting up reading of command line options, storing defaults if not - # provided. - parser = argparse.ArgumentParser(description="Do deep learning!") - parser.add_argument("--pf", dest="path_data", - type=str, default="/trainingData") - parser.add_argument("--csv1", dest="csv1", type=str, - default="/metadata/images_crosswalk.tsv") - parser.add_argument("--csv2", dest="csv2", type=str, - default="/metadata/exams_metadata.tsv") - parser.add_argument("--csv3", dest="csv3", type=str, - default="/scoringData/image_metadata.tsv") - parser.add_argument("--net", dest="net", type=str, default="GoogLe") - parser.add_argument("--lr", dest="lr", type=float, default=0.001) - parser.add_argument("--reg", dest="reg", type=float, default=0.00001) - parser.add_argument("--out", dest="output", type=str, - default="/modelState/out_train.txt") - parser.add_argument("--outtxt", dest="outtxt", - type=str, default="/output/out.txt") - parser.add_argument("--saver", dest="saver", type=str, - default="/modelState/model.ckpt") - parser.add_argument("--decay", dest="decay", type=float, default=1.0) - parser.add_argument("--dropout", dest="dropout", type=float, default=0.5) - parser.add_argument("--bs", dest="bs", type=int, default=10) - parser.add_argument("--epoch", dest="epoch", type=int, default=10) - parser.add_argument("--test", dest="test", type=int, default=0) - parser.add_argument("--ms", dest="matrix_size", type=int, default=224) - parser.add_argument("--time", dest="time", type=float, default=1000000) - parser.add_argument("--device", dest="device", type=str, default="/gpu:0") - parser.add_argument("--seed", dest="seed", type=int, default=1) - opts = parser.parse_args(args[1:]) - # Setting up the output file. - if isfile(opts.output): - remove(opts.output) - f = open(opts.output, 'w') - # Finding list of data. - statement = "Parsing the csv's." - super_print(statement, f) - path_csv_crosswalk = opts.csv1 - path_csv_metadata = opts.csv2 - path_csv_test = opts.csv1 - if opts.test: - X_tr, X_te, Y_tr, Y_te = create_test_splits(path_csv_test) - else: - # X_tr, X_te, Y_tr, Y_te = create_data_splits( - # path_csv_crosswalk, path_csv_metadata) - X_tr, X_te, Y_tr, Y_te = create_data_splits2( - path_csv_crosswalk, path_csv_metadata, opts.seed, f) - # Train a network and print a bunch of information. - statement = "Let's start the training!" - super_print(statement, f) - statement = "Network: " + opts.net + ", Dropout: " + \ - str(opts.dropout) + ", Reg: " + str(opts.reg) + \ - ", LR: " + str(opts.lr) + ", Decay: " + str(opts.decay) - super_print(statement, f) - train_net(X_tr, X_te, Y_tr, Y_te, opts, f) - f.close() - return 0 - -if __name__ == '__main__' and __package__ is None: - from os import sys, path - sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) - sys.path.append(path.dirname(path.abspath(__file__))) - main(sys.argv) diff --git a/training/tf_example/Dockerfile_tf_example b/training/tf_example/Dockerfile_tf_example deleted file mode 100644 index afa5f29..0000000 --- a/training/tf_example/Dockerfile_tf_example +++ /dev/null @@ -1,14 +0,0 @@ -FROM tensorflow/tensorflow:0.10.0-gpu -MAINTAINER Darvin Yi - -# Prepare for the Digital Mammography DREAM Challenge -RUN pip install --upgrade pip -RUN pip install pydicom -RUN pip install -U scikit-learn -RUN pip install tflearn #git+https://github.com/tflearn/tflearn.git - -WORKDIR / -COPY DREAM_DM_starter_tf.py . -COPY train.sh . -COPY test.sh . -COPY score_sc1.sh . \ No newline at end of file diff --git a/training/tf_example/train.sh b/training/tf_example/train.sh deleted file mode 100755 index 0575402..0000000 --- a/training/tf_example/train.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -# -# Digital Mammography DREAM Challenge -# Training inference method - -echo "==== nvidia-smi command output ====" -nvidia-smi -echo "==== python version ====" -python -V -echo "==== tensorflow package info ====" -pip show tensorflow - - -# Run training -echo "==== Start training ====" -python DREAM_DM_starter_tf.py --lr 0.0001 --reg 0.0001 --decay 0.985 --bs 50 --time 240 --net GoogLe --ms 224 --dropout 0.6 diff --git a/training/tf_example/train_local.sh b/training/tf_example/train_local.sh deleted file mode 100755 index 4333485..0000000 --- a/training/tf_example/train_local.sh +++ /dev/null @@ -1,17 +0,0 @@ -python DREAM_DM_starter_tf.py \ - --lr 0.0001 \ - --decay 0.985 \ - --bs 100 \ - --epoch 1 \ - --net GoogLe \ - --ms 224 \ - --dropout 0.6 \ - --pf ./trainingData \ - --csv1 ./metadata/images_crosswalk.tsv \ - --csv2 ./metadata/exams_metadata.tsv \ - --out ./modelState/out_train.txt \ - --outtxt ./output/out.txt \ - --saver ./modelState/model.ckpt - -# One liner. -# python DREAM_DM_starter_tf.py --lr 0.0001 --decay 0.985 --bs 100 --epoch 1 --net GoogLe --ms 224 --dropout 0.6 --pf ./trainingData --csv1 ./metadata/images_crosswalk.tsv --csv2 ./metadata/exams_metadata.tsv --out ./modelState/out_train.txt --outtxt ./output/out.txt --saver ./modelState/model.ckpt diff --git a/training/train.sh b/training/train.sh deleted file mode 100755 index 22df9ac..0000000 --- a/training/train.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH - -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -EXAM_TSV="/metadata/exams_metadata.tsv" -IMG_FOLDER="/trainingData" -IMG_EXT="dcm" -BEST_MODEL="/modelState/dmresnet56rb6_1152_bestAuc_model.h5" -# BEST_MODEL="NOSAVE" -FINAL_MODEL="NOSAVE" - - -python dm_resnet_train.py \ - --img-extension $IMG_EXT \ - --img-size 1152 896 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --multi-view \ - --val-size 5000 \ - --featurewise-norm \ - --batch-size 8 \ - --samples-per-epoch 64000 \ - --nb-epoch 10 \ - --lr-patience 2 \ - --es-patience 10 \ - --balance-classes 1.0 \ - --allneg-skip \ - --pos-class-weight 1.0 \ - --net dmresnet56rb6 \ - --nb-init-filter 16 \ - --init-filter-size 3 \ - --init-conv-stride 2 \ - --max-pooling-size 2 \ - --max-pooling-stride 2 \ - --weight-decay 0.0001 \ - --alpha 0.0001 \ - --l1-ratio 0.0 \ - --inp-dropout 0.0 \ - --hidden-dropout 0.0 \ - --init-learningrate 0.01 \ - --best-model $BEST_MODEL \ - --final-model $FINAL_MODEL \ - $IMG_FOLDER diff --git a/training/train_bow.sh b/training/train_bow.sh deleted file mode 100755 index 77934a3..0000000 --- a/training/train_bow.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -EXAM_TSV="/metadata/exams_metadata.tsv" -IMG_FOLDER="/trainingData" -IMG_EXT="dcm" -DL_STATE="/resnet50_candidROI_bestAuc_model.h5" -ROI_STATE="/roi_clf4.h5" -KM_STATE="/modelState/dlrepr_km_model.pkl" -BOW_TRAIN_OUT="/modelState/bow_dat_train.pkl" -BOW_TEST_OUT="/modelState/bow_dat_test.pkl" - -echo -n "Start training: " && date -echo - -python dm_bow_train.py \ - --img-extension $IMG_EXT \ - --img-height 1024 \ - --img-scale 4095 \ - --val-size 80 \ - --featurewise-norm \ - --featurewise-mean 873.6\ - --featurewise-std 739.3 \ - --img-per-batch 2 \ - --roi-per-img 16 \ - --roi-size 256 256 \ - --low-int-threshold 0.05 \ - --blob-min-area 3 \ - --blob-min-int 0.5 \ - --blob-max-int 0.85 \ - --blob-th-step 10 \ - --roi-state $ROI_STATE \ - --roi-clf-bs 64 \ - --nb-pos-samples 384 \ - --nb-neg-samples 1536 \ - --no-aug-for-neg \ - --sample-per-pos 4 \ - --sample-per-neg 2 \ - --dl-clf-bs 64 \ - --nb-words 512 \ - --km-max-iter 100 \ - --km-bs 10 \ - --km-patience 100 \ - --km-init 30 \ - --exam-neg-vs-pos-ratio 4.0 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --km-state $KM_STATE \ - --bow-train-out $BOW_TRAIN_OUT \ - --bow-test-out $BOW_TEST_OUT \ - $IMG_FOLDER $DL_STATE - -echo -echo -n "End training: " && date - - - - - - - - - - - - - diff --git a/training/train_bow_local.sh b/training/train_bow_local.sh deleted file mode 100755 index a554644..0000000 --- a/training/train_bow_local.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/bin/bash - -# export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="./metadata/images_crosswalk.tsv" -EXAM_TSV="./metadata/exams_metadata.tsv" -IMG_FOLDER="./trainingData" -IMG_EXT="dcm" -DL_STATE="modelState/resnet50_candidROI_local_bestAuc_model8.h5" -ROI_STATE="./modelState/2017-02-02_roi_256/roi_clf4.h5" -KM_STATE="./modelState/dlrepr_km_model.pkl" -BOW_TRAIN_OUT="./modelState/bow_dat_train.pkl" -BOW_TEST_OUT="./modelState/bow_dat_test.pkl" - -echo -n "Start training: " && date -echo - -python dm_bow_train.py \ - --img-extension $IMG_EXT \ - --img-height 1024 \ - --img-scale 4095 \ - --val-size 0.3 \ - --featurewise-norm \ - --featurewise-mean 918.6 \ - --featurewise-std 735.2 \ - --img-per-batch 2 \ - --roi-per-img 16 \ - --roi-size 256 256 \ - --low-int-threshold 0.05 \ - --blob-min-area 3 \ - --blob-min-int 0.5 \ - --blob-max-int 0.85 \ - --blob-th-step 10 \ - --roi-state $ROI_STATE \ - --roi-clf-bs 32 \ - --nb-pos-samples 1040 \ - --nb-neg-samples 4160 \ - --aug-for-neg \ - --sample-per-pos 8 \ - --sample-per-neg 4 \ - --dl-clf-bs 32 \ - --nb-words 4 \ - --km-max-iter 100 \ - --km-bs 200 \ - --km-patience 30 \ - --km-init 30 \ - --exam-neg-vs-pos-ratio 4.0 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --km-state $KM_STATE \ - --bow-train-out $BOW_TRAIN_OUT \ - --bow-test-out $BOW_TEST_OUT \ - $IMG_FOLDER $DL_STATE - -echo -echo -n "End training: " && date - - - - - - - - - - - - - diff --git a/training/train_candidROI.sh b/training/train_candidROI.sh deleted file mode 100755 index 492a886..0000000 --- a/training/train_candidROI.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -EXAM_TSV="/metadata/exams_metadata.tsv" -IMG_FOLDER="/trainingData" -IMG_EXT="dcm" -BEST_MODEL="/modelState/resnet50_candidROI_bestAuc_model.h5" -RESUME_FROM="/roi_clf4.h5" -ROI_STATE="/roi_clf4.h5" -FINAL_MODEL="NOSAVE" - -python dm_candidROI_train.py \ - --img-extension $IMG_EXT \ - --img-height 1024 \ - --img-scale 4095 \ - --featurewise-norm \ - --norm-fit-size 20 \ - --net resnet50 \ - --resume-from $RESUME_FROM \ - --val-size 0.1 \ - --loadval-ram \ - --img-per-batch 4 \ - --roi-per-img 16 \ - --roi-size 256 256 \ - --low-int-threshold 0.05 \ - --blob-min-area 3 \ - --blob-min-int 0.5 \ - --blob-max-int 0.85 \ - --blob-th-step 10 \ - --roi-state $ROI_STATE \ - --clf-bs 64 \ - --patches-per-epoch 1280 \ - --nb-epoch 20 \ - --lr-patience 3 \ - --es-patience 10 \ - --allneg-skip 0.8 \ - --pos-class-weight 5.0 \ - --nb-init-filter 32 \ - --init-filter-size 5 \ - --init-conv-stride 2 \ - --max-pooling-size 2 \ - --max-pooling-stride 2 \ - --weight-decay 0.0001 \ - --alpha 0.0001 \ - --l1-ratio 0.0 \ - --inp-dropout 0.0 \ - --hidden-dropout 0.0 \ - --init-learningrate 0.01 \ - --best-model $BEST_MODEL \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --final-model $FINAL_MODEL \ - $IMG_FOLDER - - - - - - - - - - - - - - - diff --git a/training/train_candidROI_local.sh b/training/train_candidROI_local.sh deleted file mode 100755 index 669025c..0000000 --- a/training/train_candidROI_local.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash - -# export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="./metadata/images_crosswalk.tsv" -EXAM_TSV="./metadata/exams_metadata.tsv" -IMG_FOLDER="./trainingData" -IMG_EXT="dcm" -BEST_MODEL="./modelState/resnet50_candidROI_local_bestAuc_model.h5" -RESUME_FROM="./modelState/2017-02-02_roi_256/roi_clf4.h5" -ROI_STATE="./modelState/2017-02-02_roi_256/roi_clf4.h5" -FINAL_MODEL="NOSAVE" - -python dm_candidROI_train.py \ - --img-extension $IMG_EXT \ - --img-height 1024 \ - --img-scale 4095 \ - --featurewise-norm \ - --norm-fit-size 10 \ - --net resnet50 \ - --resume-from $RESUME_FROM \ - --val-size 0.3 \ - --loadval-ram \ - --img-per-batch 2 \ - --roi-per-img 16 \ - --roi-size 256 256 \ - --low-int-threshold 0.05 \ - --blob-min-area 3 \ - --blob-min-int 0.5 \ - --blob-max-int 0.85 \ - --blob-th-step 10 \ - --roi-state $ROI_STATE \ - --clf-bs 32 \ - --patches-per-epoch 12800 \ - --nb-epoch 20 \ - --lr-patience 1 \ - --es-patience 10 \ - --allneg-skip 0.8 \ - --pos-class-weight 5.0 \ - --nb-init-filter 32 \ - --init-filter-size 5 \ - --init-conv-stride 2 \ - --max-pooling-size 2 \ - --max-pooling-stride 2 \ - --weight-decay 0.0001 \ - --alpha 0.0001 \ - --l1-ratio 0.0 \ - --inp-dropout 0.0 \ - --hidden-dropout 0.0 \ - --init-learningrate 0.01 \ - --best-model $BEST_MODEL \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --final-model $FINAL_MODEL \ - $IMG_FOLDER - - - - - - - - - - - - - - - diff --git a/training/train_docker_local.sh b/training/train_docker_local.sh deleted file mode 100644 index ec1ebb6..0000000 --- a/training/train_docker_local.sh +++ /dev/null @@ -1,7 +0,0 @@ -nvidia-docker run --rm --name resnet \ - -v /mnt/disk3/proj/DREAM2016_dm/DREAM2016_dm/training/metadata:/metadata \ - -v /mnt/disk3/proj/DREAM2016_dm/DREAM2016_dm/training/trainingData:/trainingData \ - -v /mnt/disk3/proj/DREAM2016_dm/DREAM2016_dm/training/preprocessedData:/preprocessedData \ - -v /mnt/disk3/proj/DREAM2016_dm/DREAM2016_dm/training/modelState:/modelState \ - -v /mnt/disk3/proj/DREAM2016_dm/DREAM2016_dm/training/scratch:/scratch \ - docker.synapse.org/syn7890435/dm-ls-train-dl:im288mv_net50_bs32 /train_small.sh diff --git a/training/train_enet.sh b/training/train_enet.sh deleted file mode 100755 index 9827d57..0000000 --- a/training/train_enet.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -EXAM_TSV="/metadata/exams_metadata.tsv" -IMG_FOLDER="/trainingData" -# IMG_FOLDER="./preprocessedData/png_288x224" -IMG_EXT="dcm" -# IMG_EXT="png" -BEST_MODEL="/modelState/enet_288_bestAuc_model.pkl" -# FINAL_MODEL="./modelState/enet_288_final_model.pkl" -FINAL_MODEL="NOSAVE" -# SAVED_MODEL="./modelState/2017-01-11_resnet50_288/resnet50_288_bestAuc_model.h5" -DL_STATE="/resnet50_288_bestAuc_model_4.h5" - -echo -n "Start training: " && date -echo - -# --resume-from $SAVED_MODEL \ -python dm_enet_train.py \ - --img-extension $IMG_EXT \ - --img-size 288 224 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --multi-view \ - --val-size 7500 \ - --featurewise-norm \ - --featurewise-mean 485.9 \ - --featurewise-std 765.2 \ - --batch-size 32 \ - --samples-per-epoch 64000 \ - --nb-epoch 10 \ - --balance-classes 1.0 \ - --allneg-skip 1.0 \ - --pos-class-weight 1.0 \ - --alpha 0.01 \ - --l1-ratio 0.5 \ - --init-learningrate 0.1 \ - --lr-patience 2 \ - --es-patience 4 \ - --dl-state $DL_STATE \ - --best-model $BEST_MODEL \ - --final-model $FINAL_MODEL \ - $IMG_FOLDER - -echo -echo -n "End training: " && date diff --git a/training/train_enet_local.sh b/training/train_enet_local.sh deleted file mode 100755 index 75f2b92..0000000 --- a/training/train_enet_local.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -# export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="./metadata/images_crosswalk.tsv" -EXAM_TSV="./metadata/exams_metadata.tsv" -IMG_FOLDER="./trainingData" -# IMG_FOLDER="./preprocessedData/png_288x224" -IMG_EXT="dcm" -# IMG_EXT="png" -BEST_MODEL="./modelState/enet_288_local_bestAuc_model.h5" -FINAL_MODEL="./modelState/enet_288_local_final_model.h5" -# FINAL_MODEL="NOSAVE" -# SAVED_MODEL="./modelState/2017-01-11_resnet50_288/resnet50_288_bestAuc_model.h5" -DL_STATE="./modelState/2017-01-13_resnet50_288/resnet50_288_bestAuc_model.h5" - - # --resume-from $SAVED_MODEL \ -echo -n "Start training: " && date -echo - -python dm_enet_train.py \ - --img-extension $IMG_EXT \ - --img-size 288 224 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --multi-view \ - --val-size 0.3 \ - --featurewise-norm \ - --featurewise-mean 485.9 \ - --featurewise-std 765.2 \ - --batch-size 8 \ - --samples-per-epoch 160 \ - --nb-epoch 50 \ - --balance-classes 1.0 \ - --allneg-skip 1.0 \ - --pos-class-weight 1.0 \ - --alpha 0.01 \ - --l1-ratio 0.5 \ - --power-t 0.75 \ - --init-learningrate 0.1 \ - --dl-state $DL_STATE \ - --best-model $BEST_MODEL \ - --final-model $FINAL_MODEL \ - $IMG_FOLDER - -echo -echo -n "End training: " && date diff --git a/training/train_local.sh b/training/train_local.sh deleted file mode 100755 index 4761bad..0000000 --- a/training/train_local.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -# export PYTHONPATH=/:$PYTHONPATH -IMG_CW_TSV="./metadata/images_crosswalk.tsv" -EXAM_TSV="./metadata/exams_metadata.tsv" -IMG_FOLDER="./preprocessedData/png_288x224" -IMG_EXT="png" -BEST_MODEL="./modelState/resnet18_288noMV_local_bestAuc_model.h5" -# FINAL_MODEL="./modelState/resnet18_288noMV_local_final_model.h5" -FINAL_MODEL="NOSAVE" - -python dm_resnet_train.py \ - --img-extension $IMG_EXT \ - --img-size 288 224 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --no-multi-view \ - --val-size 0.2 \ - --featurewise-norm \ - --batch-size 8 \ - --samples-per-epoch 160 \ - --nb-epoch 1 \ - --balance-classes 1.0 \ - --allneg-skip \ - --pos-class-weight 1.0 \ - --net resnet18 \ - --nb-init-filter 16 \ - --init-filter-size 3 \ - --init-conv-stride 2 \ - --max-pooling-size 2 \ - --max-pooling-stride 2 \ - --weight-decay 0.0001 \ - --alpha 0.0001 \ - --l1-ratio 0.0 \ - --inp-dropout 0.0 \ - --hidden-dropout 0.0 \ - --init-learningrate 0.01 \ - --lr-patience 10 \ - --es-patience 30 \ - --best-model $BEST_MODEL \ - --final-model $FINAL_MODEL \ - $IMG_FOLDER diff --git a/training/train_roi_clf.sh b/training/train_roi_clf.sh deleted file mode 100755 index dea7881..0000000 --- a/training/train_roi_clf.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH - -X_TRAIN="referenceData/MIAS/X_train.npy" -X_TEST="referenceData/MIAS/X_test.npy" -Y_TRAIN="referenceData/MIAS/y_train.npy" -Y_TEST="referenceData/MIAS/y_test.npy" -SAVED_MODEL="modelState/2017-02-02_roi_256/roi_clf.h5" -BEST_MODEL="modelState/2017-02-02_roi_256/roi_clf2.h5" -# BEST_MODEL="NOSAVE" -FINAL_MODEL="NOSAVE" -# SAVED_MODEL="/resnet50_288_bestAuc_model.h5" - -echo -n "Start training: " && date -echo - -python roi_clf_train.py \ - --img-size 256 256 \ - --featurewise-norm \ - --rotation-range 0 \ - --width-shift-range 0.0 \ - --height-shift-range 0.0 \ - --zoom-range 1.0 1.0 \ - --horizontal-flip \ - --vertical-flip \ - --batch-size 32 \ - --nb-epoch 100 \ - --lr-patience 10 \ - --es-patience 40 \ - --pos-class-weight 5.0 \ - --nb-init-filter 32 \ - --init-filter-size 5 \ - --init-conv-stride 2 \ - --max-pooling-size 2 \ - --max-pooling-stride 2 \ - --weight-decay 0.0001 \ - --alpha 0.0001 \ - --l1-ratio 0.0 \ - --inp-dropout 0.0 \ - --hidden-dropout 0.0 \ - --init-learningrate 0.001 \ - --resume-from $SAVED_MODEL \ - --best-model $BEST_MODEL \ - --final-model $FINAL_MODEL \ - $X_TRAIN $X_TEST $Y_TRAIN $Y_TEST - -echo -echo -n "End training: " && date - - - diff --git a/training/train_small.sh b/training/train_small.sh deleted file mode 100755 index 3a21a71..0000000 --- a/training/train_small.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -export PYTHONPATH=/:$PYTHONPATH - -IMG_CW_TSV="/metadata/images_crosswalk.tsv" -EXAM_TSV="/metadata/exams_metadata.tsv" -IMG_FOLDER="/trainingData" -IMG_EXT="dcm" -BEST_MODEL="/modelState/dummy_model.h5" -FINAL_MODEL="NOSAVE" - - -python dm_resnet_train.py \ - --img-extension $IMG_EXT \ - --img-size 288 224 \ - --img-tsv $IMG_CW_TSV \ - --exam-tsv $EXAM_TSV \ - --multi-view \ - --val-size 0.2 \ - --featurewise-norm \ - --batch-size 8 \ - --samples-per-epoch 80 \ - --nb-epoch 1 \ - --lr-patience 3 \ - --es-patience 10 \ - --balance-classes 1.0 \ - --allneg-skip \ - --pos-class-weight 1.0 \ - --net resnet50 \ - --nb-init-filter 16 \ - --init-filter-size 3 \ - --init-conv-stride 2 \ - --max-pooling-size 2 \ - --max-pooling-stride 2 \ - --weight-decay 0.0001 \ - --alpha 0.0001 \ - --l1-ratio 0.0 \ - --inp-dropout 0.0 \ - --hidden-dropout 0.0 \ - --init-learningrate 0.01 \ - --best-model $BEST_MODEL \ - --final-model $FINAL_MODEL \ - $IMG_FOLDER