diff --git a/examples/nas/fbnet/__init__.py b/examples/nas/fbnet/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/examples/nas/fbnet/datasets.py b/examples/nas/fbnet/datasets.py deleted file mode 100644 index 8e55c06a194..00000000000 --- a/examples/nas/fbnet/datasets.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import cv2 -import os -import random -import sys - -import numpy as np - -from torch.utils import data -from torch.utils.data import DataLoader - - -def flip(img, annotation): - # parse - img = np.fliplr(img).copy() - h, w = img.shape[:2] - x_min, y_min, x_max, y_max = annotation[0:4] - landmark_x = annotation[4::2] - landmark_y = annotation[4 + 1 :: 2] - - bbox = np.array([w - x_max, y_min, w - x_min, y_max]) - for i in range(len(landmark_x)): - landmark_x[i] = w - landmark_x[i] - - new_annotation = list() - new_annotation.append(x_min) - new_annotation.append(y_min) - new_annotation.append(x_max) - new_annotation.append(y_max) - - for i in range(len(landmark_x)): - new_annotation.append(landmark_x[i]) - new_annotation.append(landmark_y[i]) - - return img, new_annotation - - -def channel_shuffle(img, annotation): - if img.shape[2] == 3: - ch_arr = [0, 1, 2] - np.random.shuffle(ch_arr) - img = img[..., ch_arr] - return img, annotation - - -def random_noise(img, annotation, limit=[0, 0.2], p=0.5): - if random.random() < p: - H, W = img.shape[:2] - noise = np.random.uniform(limit[0], limit[1], size=(H, W)) * 255 - - img = img + noise[:, :, np.newaxis] * np.array([1, 1, 1]) - img = np.clip(img, 0, 255).astype(np.uint8) - - return img, annotation - - -def random_brightness(img, annotation, brightness=0.3): - alpha = 1 + np.random.uniform(-brightness, brightness) - img = alpha * img - img = np.clip(img, 0, 255).astype(np.uint8) - return img, annotation - - -def random_contrast(img, annotation, contrast=0.3): - # rgb to gray (YCbCr) - coef = np.array([[[0.114, 0.587, 0.299]]]) - alpha = 1.0 + np.random.uniform(-contrast, contrast) - gray = img * coef - gray = (3.0 * (1.0 - alpha) / gray.size) * np.sum(gray) - img = alpha * img + gray - img = np.clip(img, 0, 255).astype(np.uint8) - return img, annotation - - -def random_saturation(img, annotation, saturation=0.5): - coef = np.array([[[0.299, 0.587, 0.114]]]) - alpha = np.random.uniform(-saturation, saturation) - gray = img * coef - gray = np.sum(gray, axis=2, keepdims=True) - img = alpha * img + (1.0 - alpha) * gray - img = np.clip(img, 0, 255).astype(np.uint8) - return img, annotation - - -def random_hue(image, annotation, hue=0.5): - h = int(np.random.uniform(-hue, hue) * 180) - - hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) - hsv[:, :, 0] = (hsv[:, :, 0].astype(int) + h) % 180 - image = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) - return image, annotation - - -def scale(img, annotation): - f_xy = np.random.uniform(-0.4, 0.8) - origin_h, origin_w = img.shape[:2] - - bbox = annotation[0:4] - landmark_x = annotation[4::2] - landmark_y = annotation[4 + 1 :: 2] - - h, w = int(origin_h * f_xy), int(origin_w * f_xy) - image = cv2.resize(img, (h, w)).astype(np.uint8) - - new_annotation = list() - for i in range(len(bbox)): - bbox[i] = bbox[i] * f_xy - new_annotation.append(bbox[i]) - - for i in range(len(landmark_x)): - landmark_x[i] = landmark_x[i] * f_xy - landmark_y[i] = landmark_y[i] * f_xy - new_annotation.append(landmark_x[i]) - new_annotation.append(landmark_y[i]) - - return image, new_annotation - - -def rotate(img, annotation, alpha=30): - - bbox = annotation[0:4] - landmark_x = annotation[4::2] - landmark_y = annotation[4 + 1 :: 2] - center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2) - rot_mat = cv2.getRotationMatrix2D(center, alpha, 1) - img_rotated_by_alpha = cv2.warpAffine(img, rot_mat, (img.shape[1], img.shape[0])) - - point_x = [bbox[0], bbox[2], bbox[0], bbox[2]] - point_y = [bbox[1], bbox[3], bbox[3], bbox[1]] - - new_point_x = list() - new_point_y = list() - for (x, y) in zip(landmark_x, landmark_y): - new_point_x.append(rot_mat[0][0] * x + rot_mat[0][1] * y + rot_mat[0][2]) - new_point_y.append(rot_mat[1][0] * x + rot_mat[1][1] * y + rot_mat[1][2]) - - new_annotation = list() - new_annotation.append(min(new_point_x)) - new_annotation.append(min(new_point_y)) - new_annotation.append(max(new_point_x)) - new_annotation.append(max(new_point_y)) - - for (x, y) in zip(landmark_x, landmark_y): - new_annotation.append(rot_mat[0][0] * x + rot_mat[0][1] * y + rot_mat[0][2]) - new_annotation.append(rot_mat[1][0] * x + rot_mat[1][1] * y + rot_mat[1][2]) - - return img_rotated_by_alpha, new_annotation - - -class PFLDDatasets(data.Dataset): - def __init__( - self, file_list, transforms=None, data_root="", img_size=112 - ): - self.line = None - self.path = None - self.img_size = img_size - self.landmarks = None - self.filenames = None - self.euler_angle = None - self.data_root = data_root - self.transforms = transforms - with open(file_list, "r") as f: - self.lines = f.readlines() - - def __getitem__(self, index): - self.line = self.lines[index].strip().split() - # load image - if self.data_root: - self.img = cv2.imread(os.path.join(self.data_root, self.line[0])) - else: - self.img = cv2.imread(self.line[0]) - # resize - self.img = cv2.resize(self.img, (self.img_size, self.img_size)) - # obtain gt labels - self.landmark = np.asarray(self.line[1 : 106 * 2 + 1], dtype=np.float32) - self.euler_angle = np.asarray(self.line[106 * 2 + 1:], dtype=np.float32) - - # augmentation - if self.transforms: - self.img = self.transforms(self.img) - return self.img, self.landmark, self.euler_angle - - def __len__(self): - return len(self.lines) diff --git a/examples/nas/fbnet/lib/__init__.py b/examples/nas/fbnet/lib/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/examples/nas/fbnet/lib/builder.py b/examples/nas/fbnet/lib/builder.py deleted file mode 100644 index 431044a20a8..00000000000 --- a/examples/nas/fbnet/lib/builder.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -from __future__ import absolute_import, division, print_function - -import gc -import os -import time -import timeit -import torch - -import numpy as np - -from lib.ops import PRIMITIVES -from lib.utils import count_model_flops, model_init - -LUT_FILE = "lut.npy" - - -def supernet_sample(model, state_dict, sampled_arch=[], lookup_table=None): - """Initialize the searched sub-model from supernet.""" - replace = list() - stage_names = [stage_name for stage_name in lookup_table.layer_num] - stage_lnum = [ - lookup_table.layer_num[stage_name] for stage_name in stage_names - ] - - if sampled_arch: - layer_id = 0 - for i, stage_name in enumerate(stage_names): - ops_names = [ - op_name for op_name in lookup_table.lut_ops[stage_name] - ] - for j in range(stage_lnum[i]): - searched_op = sampled_arch[layer_id] - layer_id += 1 - op_i = ops_names.index(searched_op) - replace.append( - ["nas_stages_{}.{}.".format(i, j), ".op.", ".ops.{}.".format(op_i)] - ) - - model_init(model, state_dict, replace=replace) - - -def sub_arch_sample(model, lookup_table, logger): - """ Sample the ops names for the sub-network.""" - stage_names = [stage_name for stage_name in lookup_table.layer_num] - stage_lnum = [ - lookup_table.layer_num[stage_name] for stage_name in stage_names - ] - - # get the op idx in each layer - arch_idxs = list() - layer_id = 0 - for theta_param in get_parameters(model, [BLOCK_THETA], mode='include'): - theta_np = theta_param.detach().cpu().numpy() - op_idx = np.argmax(theta_np) - arch_idxs.append(op_idx) - logger.info("layer {}: {}, index: {}".format(layer_id, theta_np, op_idx)) - layer_id += 1 - - # get the arch_sample - arch_operations = list() - layer_id = 0 - for i, stage_name in enumerate(stage_names): - ops_names = [ - op_name for op_name in lookup_table.lut_ops[stage_name] - ] - for j in range(stage_lnum[i]): - searched_op = ops_names[arch_idxs[layer_id]] - arch_operations.append(searched_op) - layer_id += 1 - - logger.info(arch_operations) - return arch_operations - - -class LookUpTable: - """Build look-up table for NAS.""" - - def __init__(self, config): - # definition of search blocks and space - self.search_space = config.search_space - # layers for NAS - self.cnt_layers = len(self.search_space["input_shape"]) - # constructors for each operation - self.lut_ops = { - stage_name: { - op_name: PRIMITIVES[op_name] - for op_name in self.search_space["stages"][stage_name]["ops"] - } for stage_name in self.search_space["stages"] - } - self.layer_num = { - stage_name: self.search_space["stages"][stage_name]["layer_num"] - for stage_name in self.search_space["stages"] - } - - # arguments for the ops constructors, input_shapes just for convinience - ( - self.layers_params, - self.layers_input_shapes, - ) = self._generate_layers_params() - - # lookup_table - self.perf_metric = config.perf_metric - - if config.lut_en: - self.lut_perf = None - self.lut_file = os.path.join(config.lut_path, LUT_FILE) - if config.lut_load: - self._create_from_file() - else: - self._create_from_operations() - - def _generate_layers_params(self): - """Generate basic params for different layers.""" - # layers_params are : c_in, c_out, stride, fm_size - layers_params = [ - [ - self.search_space["input_shape"][layer_id][0], - self.search_space["channel_size"][layer_id], - self.search_space["strides"][layer_id], - self.search_space["fm_size"][layer_id], - ] - for layer_id in range(self.cnt_layers) - ] - - # layers_input_shapes are (C_in, input_w, input_h) - layers_input_shapes = self.search_space["input_shape"] - - return layers_params, layers_input_shapes - - def _create_from_operations(self, cnt_of_runs=200): - """Create performance cost for each op.""" - if self.perf_metric == "latency": - self.lut_perf = self._calculate_latency(cnt_of_runs) - elif self.perf_metric == "flops": - self.lut_perf = self._calculate_flops() - - self._write_lookup_table_to_file() - - def _calculate_flops(self, eps=0.001): - """FLOPs cost.""" - flops_table_layer_by_ops = [{} for i in range(self.cnt_layers)] - layer_id = 0 - - for stage_name in self.lut_ops: - stage_ops = self.lut_ops[stage_name] - ops_num = self.layer_num[stage_name] - - for _ in range(ops_num): - for op_name in stage_ops: - layer_param = self.layers_params[layer_id] - key_params = {"fm_size": layer_param[3]} - op = stage_ops[op_name](*layer_param[0:3], **key_params) - - # measured in micro-second - flops = count_model_flops(op, self.layers_input_shapes[layer_id]) - flops = eps if flops == 0.0 else flops - flops_table_layer_by_ops[layer_id][op_name] = float(flops) - layer_id += 1 - - return flops_table_layer_by_ops - - def _calculate_latency(self, cnt_of_runs): - """Latency cost.""" - LATENCY_BATCH_SIZE = 1 - latency_table_layer_by_ops = [{} for i in range(self.cnt_layers)] - layer_id = 0 - - for stage_name in self.lut_ops: - stage_ops = self.lut_ops[stage_name] - ops_num = self.layer_num[stage_name] - - for _ in range(ops_num): - for op_name in stage_ops: - layer_param = self.layers_params[layer_id] - key_params = {"fm_size": layer_param[3]} - op = stage_ops[op_name](*layer_param[0:3], **key_params) - input_sample = torch.randn( - (LATENCY_BATCH_SIZE, *self.layers_input_shapes[layer_id]) - ) - globals()["op"], globals()["input_sample"] = op, input_sample - total_time = timeit.timeit( - "output = op(input_sample)", - setup="gc.enable()", - globals=globals(), - number=cnt_of_runs, - ) - # measured in micro-second - latency_table_layer_by_ops[layer_id][op_name] = ( - total_time / cnt_of_runs / LATENCY_BATCH_SIZE * 1e6 - ) - layer_id += 1 - - return latency_table_layer_by_ops - - def _write_lookup_table_to_file(self): - """Save lut as numpy file.""" - np.save(self.lut_file, self.lut_perf) - - def _create_from_file(self): - """Load numpy file.""" - self.lut_perf = np.load(self.lut_file, allow_pickle=True) diff --git a/examples/nas/fbnet/lib/config.py b/examples/nas/fbnet/lib/config.py deleted file mode 100644 index cf332bb5a18..00000000000 --- a/examples/nas/fbnet/lib/config.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -from __future__ import absolute_import, division, print_function - -import os - -import numpy as np - -LUT_PATH = "lut" - - -search_space = { - # multi-stage definition for candidate layers - "stages": { - "stage_0": { - "ops": [ - "mb_k3_res", - "mb_k3_e2_res", - "mb_k3_res_d3", - "mb_k5_res", - "mb_k5_e2_res", - "sep_k3", - "sep_k5", - "gh_k3", - "gh_k5", - ], - "layer_num": 2, - }, - - "stage_1": { - "ops": [ - "mb_k3_e2_res", - "mb_k3_e4_res", - "mb_k3_e2_res_se", - "mb_k3_res_d3", - "mb_k5_res", - "mb_k5_e2_res", - "mb_k5_res_se", - "mb_k5_e2_res_se", - "gh_k5", - ], - "layer_num": 3, - }, - }, - - # necessary information of layers for NAS - "input_shape": [ - (32, 14, 14), - (32, 14, 14), - (32, 14, 14), - (64, 7, 7), - (64, 7, 7), - ], - "channel_size": [32, 32, 64, 64, 64], - "strides": [1, 1, 2, 1, 1], - "fm_size": [14, 14, 7, 7, 7], -} - - -class NASConfig: - - def __init__( - self, - perf_metric='flops', - lut_load=False, - arch_search=True, - model_dir=None, - nas_lr=0.01, - nas_weight_decay=5e-4, - mode='mul', - alpha=0.18, - beta=0.6, - start_epoch=50, - init_temperature=5.0, - exp_anneal_rate=np.exp(-0.045), - search_space=None, - ): - # LUT of performance metric - self.perf_metric = perf_metric - assert perf_metric in ['flops', 'latency'], "perf_metric should be ['flops', 'latency']" - # wether load or create lut file - self.lut_load = lut_load - self.arch_search = arch_search - # necessary dirs - self.lut_en = model_dir is not None - if self.lut_en: - self.model_dir = model_dir - os.makedirs(model_dir, exist_ok=True) - self.lut_path = os.path.join(model_dir, LUT_PATH) - os.makedirs(self.lut_path, exist_ok=True) - # NAS learning setting - self.nas_lr = nas_lr - self.nas_weight_decay = nas_weight_decay - # hardware-aware loss setting - self.mode = mode - self.alpha = alpha - self.beta = beta - # NAS training setting - self.start_epoch = start_epoch - self.init_temperature = init_temperature - self.exp_anneal_rate = exp_anneal_rate - # definition of search blocks and space - self.search_space = search_space diff --git a/examples/nas/fbnet/lib/ops.py b/examples/nas/fbnet/lib/ops.py deleted file mode 100644 index 78add840ae8..00000000000 --- a/examples/nas/fbnet/lib/ops.py +++ /dev/null @@ -1,377 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -from __future__ import absolute_import, division, print_function - -import torch - -import numpy as np -import torch.nn as nn -import torch.nn.functional as F - - -# Basic primitives as the network path -PRIMITIVES = { - "skip": lambda c_in, c_out, stride, **kwargs: Identity( - c_in, c_out, stride, **kwargs - ), - "conv1x1": lambda c_in, c_out, stride, **kwargs: Conv1x1( - c_in, c_out, stride, **kwargs - ), - "depth_conv": lambda c_in, c_out, stride, **kwargs: DepthConv( - c_in, c_out, stride, **kwargs - ), - "sep_k3": lambda c_in, c_out, stride, **kwargs: SeparableConv( - c_in, c_out, stride, **kwargs - ), - "sep_k5": lambda c_in, c_out, stride, **kwargs: SeparableConv( - c_in, c_out, stride, kernel=5, **kwargs - ), - "gh_k3": lambda c_in, c_out, stride, **kwargs: GhostModule( - c_in, c_out, stride, **kwargs - ), - "gh_k5": lambda c_in, c_out, stride, **kwargs: GhostModule( - c_in, c_out, stride, kernel=5, **kwargs - ), - "mb_k3": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=3, expand=1, **kwargs - ), - "mb_k3_e2": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=3, expand=2, **kwargs - ), - "mb_k3_e4": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=3, expand=4, **kwargs - ), - "mb_k3_res": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=3, expand=1, res=True, **kwargs - ), - "mb_k3_e2_res": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=3, expand=2, res=True, **kwargs - ), - "mb_k3_e4_res": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=3, expand=4, res=True, **kwargs - ), - "mb_k3_d2": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=3, expand=2, res=False, dilation=2, **kwargs - ), - "mb_k3_d3": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=3, expand=2, res=False, dilation=3, **kwargs - ), - "mb_k3_res_d2": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=3, expand=2, res=True, dilation=2, **kwargs - ), - "mb_k3_res_d3": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=3, expand=2, res=True, dilation=3, **kwargs - ), - "mb_k3_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=3, expand=1, res=True, dilation=1, se=True, **kwargs - ), - "mb_k3_e2_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=3, expand=2, res=True, dilation=1, se=True, **kwargs - ), - "mb_k3_e4_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=3, expand=4, res=True, dilation=1, se=True, **kwargs - ), - "mb_k5": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=5, expand=1, **kwargs - ), - "mb_k5_e2": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=5, expand=2, **kwargs - ), - "mb_k5_res": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=5, expand=1, res=True, **kwargs - ), - "mb_k5_e2_res": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=5, expand=2, res=True, **kwargs - ), - "mb_k5_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=5, expand=1, res=True, dilation=1, se=True, **kwargs - ), - "mb_k5_e2_res_se": lambda c_in, c_out, stride, **kwargs: MBBlock( - c_in, c_out, stride, kernel=5, expand=2, res=True, dilation=1, se=True, **kwargs - ), -} - - -def conv_bn(inp, oup, kernel, stride, padding=1, groups=1): - return nn.Sequential( - nn.Conv2d(inp, oup, kernel, stride, padding, groups=groups, bias=False), - nn.BatchNorm2d(oup), - nn.ReLU(inplace=True), - ) - - -class SeparableConv(nn.Module): - """Separable convolution.""" - - def __init__(self, in_ch, out_ch, stride=1, kernel=3, fm_size=7): - super(SeparableConv, self).__init__() - assert stride in [1, 2], "stride should be in [1, 2]" - padding = kernel // 2 - - self.conv = nn.Sequential( - conv_bn(in_ch, in_ch, kernel, stride, padding=padding, groups=in_ch), - conv_bn(in_ch, out_ch, 1, 1, padding=0), - ) - - def forward(self, x): - return self.conv(x) - - - -class Conv1x1(nn.Module): - """1x1 convolution.""" - - def __init__(self, in_ch, out_ch, stride=1, kernel=1, fm_size=7): - super(Conv1x1, self).__init__() - assert stride in [1, 2], "stride should be in [1, 2]" - padding = kernel // 2 - - self.conv = nn.Sequential( - nn.Conv2d(in_ch, out_ch, kernel, stride, padding), - nn.ReLU(inplace=True), - ) - - def forward(self, x): - return self.conv(x) - - -class DepthConv(nn.Module): - """depth convolution.""" - - def __init__(self, in_ch, out_ch, stride=1, kernel=3, fm_size=7): - super(DepthConv, self).__init__() - assert stride in [1, 2], "stride should be in [1, 2]" - padding = kernel // 2 - - self.conv = nn.Sequential( - nn.Conv2d(in_ch, in_ch, kernel, stride, padding, groups=in_ch), - nn.ReLU(inplace=True), - nn.Conv2d(in_ch, out_ch, 1, 1, 0), - nn.ReLU(inplace=True), - ) - - def forward(self, x): - return self.conv(x) - - -class GhostModule(nn.Module): - """Gost module.""" - def __init__(self, in_ch, out_ch, stride=1, kernel=3, fm_size=7): - super(GhostModule, self).__init__() - mid_ch = out_ch // 2 - self.primary_conv = conv_bn(in_ch, mid_ch, 1, stride, padding=0) - self.cheap_operation = conv_bn( - mid_ch, mid_ch, kernel, 1, kernel // 2, mid_ch - ) - - def forward(self, x): - x1 = self.primary_conv(x) - x2 = self.cheap_operation(x1) - return torch.cat([x1, x2], dim=1) - - -class StemBlock(nn.Module): - def __init__(self, in_ch=3, init_ch=32, bottleneck=True): - super(StemBlock, self).__init__() - self.stem_1 = conv_bn(in_ch, init_ch, 3, 2, 1) - mid_ch = int(init_ch // 2) if bottleneck else init_ch - self.stem_2a = conv_bn(init_ch, mid_ch, 1, 1, 0) - self.stem_2b = SeparableConv(mid_ch, init_ch, 2, 1) - self.stem_2p = nn.MaxPool2d(kernel_size=2, stride=2) - self.stem_3 = conv_bn(init_ch * 2, init_ch, 1, 1, 0) - - def forward(self, x): - stem_1_out = self.stem_1(x) - - stem_2a_out = self.stem_2a(stem_1_out) - stem_2b_out = self.stem_2b(stem_2a_out) - - stem_2p_out = self.stem_2p(stem_1_out) - - out = self.stem_3(torch.cat((stem_2b_out, stem_2p_out), 1)) - return out, stem_1_out - - -class Identity(nn.Module): - """ Identity module.""" - - def __init__(self, in_ch, out_ch, stride=1, fm_size=7): - super(Identity, self).__init__() - self.conv = ( - conv_bn(in_ch, out_ch, kernel=1, stride=stride, padding=0) - if in_ch != out_ch or stride != 1 - else None - ) - - def forward(self, x): - if self.conv: - out = self.conv(x) - else: - out = x - # Add dropout to avoid overfit on Identity (PDARTS) - out = nn.functional.dropout(out, p=0.5) - return out - - -class Hsigmoid(nn.Module): - """Hsigmoid activation function.""" - - def __init__(self, inplace=True): - super(Hsigmoid, self).__init__() - self.inplace = inplace - - def forward(self, x): - return F.relu6(x + 3.0, inplace=self.inplace) / 6.0 - - -class eSEModule(nn.Module): - """ The improved SE Module.""" - - def __init__(self, channel, fm_size=7, se=True): - super(eSEModule, self).__init__() - self.se = se - - if self.se: - self.avg_pool = nn.Conv2d( - channel, channel, fm_size, 1, 0, groups=channel, bias=False - ) - self.fc = nn.Conv2d(channel, channel, kernel_size=1, padding=0) - self.hsigmoid = Hsigmoid() - - def forward(self, x): - if self.se: - input = x - x = self.avg_pool(x) - x = self.fc(x) - x = self.hsigmoid(x) - return input * x - else: - return x - - -class ChannelShuffle(nn.Module): - """Procedure: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W].""" - - def __init__(self, groups): - super(ChannelShuffle, self).__init__() - self.groups = groups - - def forward(self, x): - if self.groups == 1: - return x - - N, C, H, W = x.size() - g = self.groups - assert C % g == 0, "Incompatible group size {} for input channel {}".format( - g, C - ) - return ( - x.view(N, g, int(C // g), H, W) - .permute(0, 2, 1, 3, 4) - .contiguous() - .view(N, C, H, W) - ) - - -class MBBlock(nn.Module): - """The Inverted Residual Block, with channel shuffle or eSEModule.""" - - def __init__( - self, - in_ch, - out_ch, - stride=1, - kernel=3, - expand=1, - res=False, - dilation=1, - se=False, - fm_size=7, - group=1, - mid_ch=-1, - ): - super(MBBlock, self).__init__() - assert stride in [1, 2], "stride should be in [1, 2]" - assert kernel in [3, 5], "kernel size should be in [3, 5]" - assert dilation in [1, 2, 3, 4], "dilation should be in [1, 2, 3, 4]" - assert group in [1, 2], "group should be in [1, 2]" - - self.use_res_connect = res and (stride == 1) - padding = kernel // 2 + (dilation - 1) - mid_ch = mid_ch if mid_ch > 0 else (in_ch * expand) - - # Basic Modules - conv_layer = nn.Conv2d - norm_layer = nn.BatchNorm2d - activation_layer = nn.ReLU - channel_suffle = ChannelShuffle - se_layer = eSEModule - - self.ir_block = nn.Sequential( - # pointwise convolution - conv_layer(in_ch, mid_ch, 1, 1, 0, bias=False, groups=group), - norm_layer(mid_ch), - activation_layer(inplace=True), - # channel shuffle if necessary - channel_suffle(group), - # depthwise convolution - conv_layer( - mid_ch, - mid_ch, - kernel, - stride, - padding=padding, - dilation=dilation, - groups=mid_ch, - bias=False, - ), - norm_layer(mid_ch), - # eSEModule if necessary - se_layer(mid_ch, fm_size, se), - activation_layer(inplace=True), - # pointwise convolution - conv_layer(mid_ch, out_ch, 1, 1, 0, bias=False, groups=group), - norm_layer(out_ch), - ) - - def forward(self, x): - if self.use_res_connect: - return x + self.ir_block(x) - else: - return self.ir_block(x) - - -class SingleOperation(nn.Module): - """Single operation for sampled path. - """ - - def __init__( - self, layers_params, stage_ops, sampled_op='', io_ch=[] - ): - super(SingleOperation, self).__init__() - - if io_ch: - assert len(io_ch) == 2, "io_ch should have two elements" - layers_params[0:2] = io_ch - key_params = {"fm_size": layers_params[3]} - ops_names = [op_name for op_name in stage_ops] - sampled_op = sampled_op if sampled_op else ops_names[0] - - # define the single op - self.op = stage_ops[sampled_op](*layers_params[0:3], **key_params) - - def forward(self, x): - return self.op(x) - - -def choice_blocks(layers_params, stage_ops): - """ Create list of layer candidates for NNI one-shot NAS""" - ops_names = [op_name for op_name in stage_ops] - key_params = {"fm_size": layers_params[3]} - - op_list = [ - stage_ops[op_name](*layers_params[0:3], **key_params) - for op_name in ops_names - ] - return op_list diff --git a/examples/nas/fbnet/lib/subnet.py b/examples/nas/fbnet/lib/subnet.py deleted file mode 100644 index dc9ad641a66..00000000000 --- a/examples/nas/fbnet/lib/subnet.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -from __future__ import absolute_import, division, print_function - -import math -import torch -import torch.nn as nn - -from lib.ops import ( - MBBlock, - SeparableConv, - SingleOperation, - StemBlock, - conv_bn, -) - -INIT_CH = 16 - - -class PFLDInference(nn.Module): - def __init__(self, lookup_table, sampled_ops, num_points=98): - super(PFLDInference, self).__init__() - - stage_names = [stage_name for stage_name in lookup_table.layer_num] - stage_lnum = [ - lookup_table.layer_num[stage_name] for stage_name in stage_names - ] - self.stem = StemBlock(init_ch=INIT_CH, bottleneck=False) - - self.block4_1 = MBBlock(INIT_CH, 32, stride=2, mid_ch=32) - self.nas_stages_0 = nn.ModuleList( - [ - SingleOperation( - lookup_table.layers_params[layer_id], - lookup_table.lut_ops[stage_names[0]], - sampled_ops[layer_id], - ) - for layer_id in range(stage_lnum[0]) - ] - ) - - self.nas_stages_1 = nn.ModuleList( - [ - SingleOperation( - lookup_table.layers_params[layer_id], - lookup_table.lut_ops[stage_names[1]], - sampled_ops[layer_id], - ) - for layer_id in range( - stage_lnum[0], stage_lnum[0] + stage_lnum[1] - ) - ] - ) - - self.avg_pool1 = nn.Conv2d( - INIT_CH, INIT_CH, 9, 8, 1, groups=INIT_CH, bias=False - ) - self.avg_pool2 = nn.Conv2d(32, 32, 3, 2, 1, groups=32, bias=False) - - self.block6_1 = nn.Conv2d(96 + INIT_CH, 64, 1, 1, 0, bias=False) - self.block6_2 = MBBlock(64, 64, res=True, se=True, mid_ch=128) - self.block6_3 = SeparableConv(64, 128, 1) - - self.conv7 = nn.Conv2d(128, 128, 7, 1, 0, groups=128, bias=False) - self.fc = nn.Conv2d(128, num_points * 2, 1, 1, 0, bias=True) - - def forward(self, x): - # x: 3, 112, 112 - x, y1 = self.stem(x) - out1 = x - - x = self.block4_1(x) - for i, single_op in enumerate(self.nas_stages_0): - x = single_op(x) - y2 = x - - for i, single_op in enumerate(self.nas_stages_1): - x = single_op(x) - y3 = x - - y1 = self.avg_pool1(y1) - y2 = self.avg_pool2(y2) - multi_scale = torch.cat([y3, y2, y1], 1) - - y = self.block6_1(multi_scale) - y = self.block6_2(y) - y = self.block6_3(y) - y = self.conv7(y) - landmarks = self.fc(y) - - return landmarks, out1 - - -class AuxiliaryNet(nn.Module): - def __init__(self): - super(AuxiliaryNet, self).__init__() - self.conv1 = conv_bn(INIT_CH, 64, 3, 2) - self.conv2 = conv_bn(64, 64, 3, 1) - self.conv3 = conv_bn(64, 32, 3, 2) - self.conv4 = conv_bn(32, 64, 7, 1) - self.max_pool1 = nn.MaxPool2d(3) - self.fc1 = nn.Linear(64, 32) - self.fc2 = nn.Linear(32, 3) - - def forward(self, x): - x = self.conv1(x) - x = self.conv2(x) - x = self.conv3(x) - x = self.conv4(x) - x = self.max_pool1(x) - x = x.view(x.size(0), -1) - x = self.fc1(x) - x = self.fc2(x) - - return x diff --git a/examples/nas/fbnet/lib/supernet.py b/examples/nas/fbnet/lib/supernet.py deleted file mode 100644 index c8e900e34b0..00000000000 --- a/examples/nas/fbnet/lib/supernet.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -from __future__ import absolute_import, division, print_function - -import math -import torch -import torch.nn as nn - -from lib.ops import ( - MBBlock, - SeparableConv, - StemBlock, - choice_blocks, - conv_bn, -) -from nni.nas.pytorch import mutables -from torch.nn import init - -INIT_CH = 16 - - -class PFLDInference(nn.Module): - def __init__(self, lookup_table, num_points=98, slice=4): - super(PFLDInference, self).__init__() - - stage_names = [stage_name for stage_name in lookup_table.layer_num] - stage_lnum = [ - lookup_table.layer_num[stage_name] for stage_name in stage_names - ] - self.stem = StemBlock(init_ch=INIT_CH, bottleneck=False) - - self.block4_1 = MBBlock(INIT_CH, 32, stride=2, mid_ch=32) - - stages_0 = [ - mutables.LayerChoice( - choice_blocks( - lookup_table.layers_params[layer_id], - lookup_table.lut_ops[stage_names[0]], - ) - ) - for layer_id in range(stage_lnum[0]) - ] - stages_1 = [ - mutables.LayerChoice( - choice_blocks( - lookup_table.layers_params[layer_id], - lookup_table.lut_ops[stage_names[1]], - ) - ) - for layer_id in range( - stage_lnum[0], stage_lnum[0] + stage_lnum[1] - ) - ] - blocks = stages_0 + stages_1 - self.blocks = nn.Sequential(*blocks) - - self.avg_pool1 = nn.Conv2d( - INIT_CH, INIT_CH, 9, 8, 1, groups=INIT_CH, bias=False - ) - self.avg_pool2 = nn.Conv2d(32, 32, 3, 2, 1, groups=32, bias=False) - - self.block6_1 = nn.Conv2d(96 + INIT_CH, 64, 1, 1, 0, bias=False) - self.block6_2 = MBBlock(64, 64, res=True, se=True, mid_ch=128) - self.block6_3 = SeparableConv(64, 128, 1) - - self.conv7 = nn.Conv2d(128, 128, 7, 1, 0, groups=128, bias=False) - self.fc = nn.Conv2d(128, num_points * 2, 1, 1, 0, bias=True) - - self.meta_layer = nn.Linear(num_points * 2 * slice, 1) - # init params - self.init_params() - - def init_params(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - init.kaiming_normal_(m.weight, mode='fan_out') - if m.bias is not None: - init.constant_(m.bias, 0) - elif isinstance(m, nn.BatchNorm2d): - init.constant_(m.weight, 1) - init.constant_(m.bias, 0) - elif isinstance(m, nn.Linear): - init.normal_(m.weight, std=0.001) - if m.bias is not None: - init.constant_(m.bias, 0) - - def forward(self, x, temperature, perf_cost): - # x: 3, 112, 112 - x, y1 = self.stem(x) - out1 = x - - x = self.block4_1(x) - for i, block in enumerate(self.blocks): - x, perf_cost = block(x, temperature, perf_cost) - if i == 1: - y2 = x - elif i == 4: - y3 = x - - y1 = self.avg_pool1(y1) - y2 = self.avg_pool2(y2) - multi_scale = torch.cat([y3, y2, y1], 1) - - y = self.block6_1(multi_scale) - y = self.block6_2(y) - y = self.block6_3(y) - y = self.conv7(y) - landmarks = self.fc(y) - - return landmarks, out1, perf_cost - - -class AuxiliaryNet(nn.Module): - def __init__(self): - super(AuxiliaryNet, self).__init__() - self.conv1 = conv_bn(INIT_CH, 64, 3, 2) - self.conv2 = conv_bn(64, 64, 3, 1) - self.conv3 = conv_bn(64, 32, 3, 2) - self.conv4 = conv_bn(32, 64, 7, 1) - self.max_pool1 = nn.MaxPool2d(3) - self.fc1 = nn.Linear(64, 32) - self.fc2 = nn.Linear(32, 3) - - def forward(self, x): - x = self.conv1(x) - x = self.conv2(x) - x = self.conv3(x) - x = self.conv4(x) - x = self.max_pool1(x) - x = x.view(x.size(0), -1) - x = self.fc1(x) - x = self.fc2(x) - - return x diff --git a/examples/nas/fbnet/lib/utils.py b/examples/nas/fbnet/lib/utils.py deleted file mode 100644 index 4923a897fb5..00000000000 --- a/examples/nas/fbnet/lib/utils.py +++ /dev/null @@ -1,226 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -from __future__ import absolute_import, division, print_function - -import os -import torch - -from torch import nn -from torch.autograd import Variable - - -def count_model_flops(model=None, in_shape=(3, 112, 112), multiply_adds=False): - """Compute the flops of model.""" - prods = {} - - def save_hook(name): - def hook_per(self, input, output): - prods[name] = np.prod(input[0].shape) - - return hook_per - - list_1 = [] - - def simple_hook(self, input, output): - list_1.append(np.prod(input[0].shape)) - - list_2 = {} - - def simple_hook2(self, input, output): - list_2["names"] = np.prod(input[0].shape) - - list_conv = [] - - def conv_hook(self, input, output): - batch_size, input_channels, input_height, input_width = input[0].size() - output_channels, output_height, output_width = output[0].size() - - kernel_ops = ( - self.kernel_size[0] * self.kernel_size[1] * (self.in_channels / self.groups) - ) - bias_ops = 1 if self.bias is not None else 0 - - params = output_channels * (kernel_ops + bias_ops) - # flops = (kernel_ops * (2 if multiply_adds else 1) + bias_ops) * output_channels * output_height * output_width * batch_size - - num_weight_params_non_zero = (self.weight.data != 0).float().sum() - num_weight_params_zero = (self.weight.data == 0).float().sum() - num_weight_params = num_weight_params_non_zero + num_weight_params_zero - if self.groups == 1: - ops = num_weight_params * (2 if multiply_adds else 1) - else: - multiplys = num_weight_params / self.groups - adds = multiplys - output_channels - ops = (multiplys + adds) if multiply_adds else adds - flops = ( - (ops + bias_ops * output_channels) - * output_height - * output_width - * batch_size - ) - list_conv.append(flops) - - list_deconv = [] - - def deconv_hook(self, input, output): - batch_size, input_channels, input_height, input_width = input[0].size() - output_channels, output_height, output_width = output[0].size() - - kernel_ops = self.kernel_size[0] * self.kernel_size[1] * self.in_channels - bias_ops = 1 if self.bias is not None else 0 - - params = output_channels * (kernel_ops + bias_ops) - # flops = (kernel_ops * (2 if multiply_adds else 1) + bias_ops) * output_channels * output_height * output_width * batch_size - - num_weight_params_non_zero = (self.weight.data != 0).float().sum() - num_weight_params_zero = (self.weight.data == 0).float().sum() - num_weight_params = num_weight_params_non_zero + num_weight_params_zero - ops = num_weight_params * (2 if multiply_adds else 1) - flops = ( - (ops + bias_ops * output_channels) - * output_height - * output_width - * batch_size - ) - list_deconv.append(flops) - - list_linear = [] - - def linear_hook(self, input, output): - batch_size = input[0].size(0) if input[0].dim() == 2 else 1 - - weight_ops = self.weight.nelement() * (2 if multiply_adds else 1) - bias_ops = self.bias.nelement() if self.bias is not None else 0 - - flops = batch_size * (weight_ops + bias_ops) - list_linear.append(flops) - - list_bn = [] - - def bn_hook(self, input, output): - list_bn.append(input[0].nelement() * 2) - - list_relu = [] - - def relu_hook(self, input, output): - list_relu.append(input[0].nelement()) - - list_pooling = [] - - def pooling_hook(self, input, output): - batch_size, input_channels, input_height, input_width = input[0].size() - output_channels, output_height, output_width = output[0].size() - - kernel_ops = self.kernel_size * self.kernel_size - bias_ops = 0 - params = 0 - flops = ( - (kernel_ops + bias_ops) - * output_channels - * output_height - * output_width - * batch_size - ) - - list_pooling.append(flops) - - list_upsample = [] - # For bilinear upsample - def upsample_hook(self, input, output): - batch_size, input_channels, input_height, input_width = input[0].size() - output_channels, output_height, output_width = output[0].size() - - flops = output_height * output_width * output_channels * batch_size * 12 - list_upsample.append(flops) - - def foo(net): - childrens = list(net.children()) - if not childrens: - if isinstance(net, torch.nn.Conv2d): - net.register_forward_hook(conv_hook) - if isinstance(net, torch.nn.ConvTranspose2d): - net.register_forward_hook(deconv_hook) - if isinstance(net, torch.nn.Linear): - net.register_forward_hook(linear_hook) - if isinstance(net, torch.nn.BatchNorm2d): - net.register_forward_hook(bn_hook) - if isinstance(net, torch.nn.ReLU): - net.register_forward_hook(relu_hook) - if isinstance(net, torch.nn.MaxPool2d) or isinstance( - net, torch.nn.AvgPool2d - ): - net.register_forward_hook(pooling_hook) - if isinstance(net, torch.nn.Upsample): - net.register_forward_hook(upsample_hook) - return - for c in childrens: - foo(c) - - foo(model) - model.eval() - with torch.no_grad(): - input = Variable( - torch.rand(3, in_shape[0], in_shape[1], in_shape[2]), requires_grad=False - ) - out = model(input) - total_flops = ( - sum(list_conv) - + sum(list_deconv) - + sum(list_linear) - + sum(list_bn) - + sum(list_relu) - + sum(list_pooling) - + sum(list_upsample) - ) - # batchsize=3 - del input, out - return total_flops / 3 - - -def model_init(model, state_dict, replace=[]): - """Initialize the model from state_dict.""" - prefix = 'module.' - param_dict = dict() - for k, v in state_dict.items(): - if k.startswith(prefix): - k = k[7:] - param_dict[k] = v - - for k, (name, m) in enumerate(model.named_modules()): - if replace: - for layer_replace in replace: - assert len(layer_replace) == 3, "The elements should be three." - pre_scope, key, replace_key = layer_replace - if pre_scope in name: - name = name.replace(key, replace_key) - - # Copy the state_dict to current model - if (name+'.weight' in param_dict) or (name+'.running_mean' in param_dict): - if isinstance(m, nn.BatchNorm2d): - shape = m.running_mean.shape - if shape == param_dict[name+'.running_mean'].shape: - print('Init OK with pretrained model: {}'.format(name)) - if m.weight is not None: - m.weight.data = param_dict[name+'.weight'] - m.bias.data = param_dict[name+'.bias'] - m.running_mean = param_dict[name+'.running_mean'] - m.running_var = param_dict[name+'.running_var'] - else: - print('Init random: {}'.format(name)) - - elif isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): - shape = m.weight.data.shape - if shape == param_dict[name+'.weight'].shape: - print('Init OK with pretrained model: {}'.format(name)) - m.weight.data = param_dict[name+'.weight'] - if m.bias is not None: - m.bias.data = param_dict[name+'.bias'] - else: - print('Init random: {}'.format(name)) - - elif isinstance(m, nn.ConvTranspose2d): - print('Init OK with pretrained model: {}'.format(name)) - m.weight.data = param_dict[name+'.weight'] - if m.bias is not None: - m.bias.data = param_dict[name+'.bias'] diff --git a/examples/nas/fbnet/loss.py b/examples/nas/fbnet/loss.py deleted file mode 100644 index 90605551d09..00000000000 --- a/examples/nas/fbnet/loss.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import math -import torch -from torch import nn - -import torch.nn.functional as F - - -class PFLDLoss(nn.Module): - def __init__(self): - super(PFLDLoss, self).__init__() - - self.w = 0.12 - self.epsilon = 0.008 - self.c = self.w * (1.0 - math.log(1.0 + self.w / self.epsilon)) - - def forward(self, targets, euler_angle_gts, angles, inputs): - x = targets - inputs - absolute_x = torch.abs(x) - - weight_angle = torch.sum(1.5 - torch.cos(euler_angle_gts), axis=1) - pose_loss = F.smooth_l1_loss(angles, euler_angle_gts, reduction='mean') - - losses = torch.where( - self.w > absolute_x, - self.w * torch.log(1.0 + absolute_x / self.epsilon), - absolute_x - self.c - ) - sum_losses = torch.sum(losses, axis=1) - loss = torch.mean(weight_angle * sum_losses, axis=0) - - return loss + 0.1 * pose_loss, loss diff --git a/examples/nas/fbnet/train.py b/examples/nas/fbnet/train.py deleted file mode 100644 index a4cd48b7f25..00000000000 --- a/examples/nas/fbnet/train.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import argparse -import logging -import os -import torch -import torchvision - -import numpy as np - -from datasets import PFLDDatasets -from loss import PFLDLoss -from lib.config import NASConfig, search_space -from lib.builder import LookUpTable, supernet_sample -from nni.algorithms.nas.pytorch.fbnet.trainer import FBNetTrainer -from torch.utils.data import DataLoader - -device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - -def main(args): - - logging.basicConfig( - format="[%(asctime)s] [p%(process)s] [%(pathname)s:%(lineno)d] [%(levelname)s] %(message)s", - level=logging.INFO, - handlers=[ - logging.FileHandler(args.log_file, mode="w"), - logging.StreamHandler(), - ], - ) - - # print the information of arguments - for arg in vars(args): - s = arg + ": " + str(getattr(args, arg)) - logging.info(s) - - # for 106 landmarks - num_points = 106 - # list of device ids, and the number of workers for data loading - device_ids = [int(id) for id in args.dev_id.split(",")] - dev_num = len(device_ids) - num_workers = 4 * dev_num - - # random seed - manual_seed = 1 - np.random.seed(manual_seed) - torch.manual_seed(manual_seed) - torch.cuda.manual_seed_all(manual_seed) - - if args.backbone == "supernet": - # import supernet for block-wise DNAS pre-training - from lib.supernet import PFLDInference, AuxiliaryNet - elif args.backbone == "subnet": - # import subnet for fine-tuning - from lib.subnet import PFLDInference, AuxiliaryNet - else: - raise ValueError("backbone is not implemented") - - # the configuration for training control - nas_config = NASConfig( - arch_search=args.arch_search, - model_dir=args.snapshot, - nas_lr=args.theta_lr, - mode=args.mode, - alpha=args.alpha, - search_space=search_space, - ) - # look-up table with basic information of search space, flops per block, etc. - lookup_table = LookUpTable(config=nas_config) - # the auxiliary-net of PFLD to predict the pose angle - auxiliarynet = AuxiliaryNet() - - if "sub" in args.backbone: - check = torch.load(args.supernet, map_location=torch.device("cpu")) - sampled_arch = check["arch_sample"] - logging.info(sampled_arch) - # create subnet - pfld_backbone = PFLDInference(lookup_table, sampled_arch, num_points) - - # pre-load the weights from pre-trained supernet - state_dict = check["pfld_backbone"] - supernet_sample(pfld_backbone, state_dict, sampled_arch, lookup_table) - - else: - # create supernet - pfld_backbone = PFLDInference(lookup_table, num_points) - - # main task loss - criterion = PFLDLoss() - - # optimizer for weight train - if args.opt == 'adam': - optimizer = torch.optim.AdamW( - [ - {"params": pfld_backbone.parameters()}, - {"params": auxiliarynet.parameters()}, - ], - lr=args.base_lr, - weight_decay=args.weight_decay, - ) - elif args.opt == 'rms': - optimizer = torch.optim.RMSprop( - [ - {"params": pfld_backbone.parameters()}, - {"params": auxiliarynet.parameters()}, - ], - lr=args.base_lr, - momentum=0.0, - weight_decay=args.weight_decay - ) - - # data argmentation and dataloader - transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) - # the landmark dataset with 106 points is default used - train_dataset = PFLDDatasets( - os.path.join(args.data_root, 'train_data/list.txt'), - transform, - data_root=args.data_root, - img_size=args.img_size, - ) - dataloader = DataLoader( - train_dataset, - batch_size=args.train_batchsize, - shuffle=True, - num_workers=num_workers, - pin_memory=True, - drop_last=False, - ) - - val_dataset = PFLDDatasets( - os.path.join(args.data_root, 'test_data/list.txt'), - transform, - data_root=args.data_root, - img_size=args.img_size, - ) - val_dataloader = DataLoader( - val_dataset, - batch_size=args.val_batchsize, - shuffle=False, - num_workers=num_workers, - pin_memory=True, - ) - - # create the trainer, then search/finetune - trainer = FBNetTrainer( - pfld_backbone, - auxiliarynet, - optimizer, - criterion, - device, - device_ids, - nas_config, - lookup_table, - dataloader, - val_dataloader, - n_epochs=args.end_epoch, - logger=logging, - ) - trainer.train() - - -def parse_args(): - parser = argparse.ArgumentParser(description="FBNet for PFLD") - parser.add_argument("--backbone", default="supernet", type=str, choices=['supernet', 'subnet']) - parser.add_argument("--dev_id", dest="dev_id", default="0", type=str) - parser.add_argument("--opt", default="rms", type=str) - parser.add_argument("--base_lr", default=0.0001, type=int) - parser.add_argument("--weight-decay", "--wd", default=1e-6, type=float) - parser.add_argument("--img_size", default=112, type=int) - parser.add_argument("--theta-lr", "--tlr", default=0.01, type=float) - parser.add_argument("--mode", default="mul", type=str, choices=['mul', 'add']) - parser.add_argument("--alpha", default=0.18, type=float) - parser.add_argument("--supernet", default="", type=str, metavar="PATH") - parser.add_argument("--end_epoch", default=300, type=int) - parser.add_argument("--snapshot", default="models", type=str, metavar="PATH") - parser.add_argument("--log_file", default="train.log", type=str) - parser.add_argument("--data_root", default="/dataset", type=str, metavar="PATH") - parser.add_argument("--train_batchsize", default=256, type=int) - parser.add_argument("--val_batchsize", default=128, type=int) - parser.add_argument("--arch-search", "-as", action="store_true") - args = parser.parse_args() - args.snapshot = os.path.join(args.snapshot, args.backbone) - args.log_file = os.path.join(args.snapshot, "train_{}.log".format(args.backbone)) - os.makedirs(args.snapshot, exist_ok=True) - return args - - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/nni/algorithms/nas/pytorch/fbnet/__init__.py b/nni/algorithms/nas/pytorch/fbnet/__init__.py deleted file mode 100644 index 9220ce40f65..00000000000 --- a/nni/algorithms/nas/pytorch/fbnet/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .mutator import FBNetMutator -from .trainer import FBNetTrainer diff --git a/nni/algorithms/nas/pytorch/fbnet/mutator.py b/nni/algorithms/nas/pytorch/fbnet/mutator.py deleted file mode 100644 index 700be8c388b..00000000000 --- a/nni/algorithms/nas/pytorch/fbnet/mutator.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import torch -from torch import nn as nn -from torch.nn import functional as F -import numpy as np - -from nni.nas.pytorch.base_mutator import BaseMutator -from nni.nas.pytorch.mutables import LayerChoice - - -class MixedOp(nn.Module): - """ - This class is to instantiate and manage info of one LayerChoice. - It includes architecture weights and member functions operating the weights. - """ - - def __init__(self, mutable, latency): - """ - Parameters - ---------- - mutable : LayerChoice - A LayerChoice in user model - latency : List - performance cost for each op in mutable - """ - super(MixedOp, self).__init__() - self.latency = latency - self.n_choices = len(mutable) - self.path_alpha = nn.Parameter( - torch.FloatTensor([1.0 / self.n_choices for i in range(self.n_choices)]) - ) - - def get_path_alpha(self): - return self.path_alpha - - def to_requires_grad(self): - self.path_alpha.requires_grad = True - - def to_disable_grad(self): - self.path_alpha.requires_grad = False - - def forward(self, mutable, x, temperature, perf_cost): - """ - Define forward of LayerChoice. - - Parameters - ---------- - mutable : LayerChoice - this layer's mutable - x : tensor - inputs of this layer, only support one input - temperature : float32 - the temperature for gumbel softmax - perf_cost : tensor - accumulated performance cost - - Returns - ------- - output: tensor - output of this layer - perf_cost : tensor - accumulated performance cost - """ - candidate_ops = list(mutable) - soft_mask_thetas = self.probs_over_ops(temperature) - output = sum(m * op(x) for m, op in zip(soft_mask_thetas, candidate_ops)) - layer_perf = sum( - m * lat for m, lat in zip(soft_mask_thetas, self.latency) - ) - perf_cost = perf_cost + layer_perf - - return output, perf_cost - - def probs_over_ops(self, temperature): - """ - Apply softmax on alpha to generate probability distribution - - Returns - ------- - pytorch tensor - probability distribution - """ - probs = F.gumbel_softmax(self.path_alpha, temperature) - return probs - - @property - def chosen_index(self): - """ - choose the op with max prob - - Returns - ------- - int - index of the chosen one - numpy.float32 - prob of the chosen one - """ - alphas = self.path_alpha.data.detach().cpu().numpy() - index = int(np.argmax(alphas)) - return index - - -class FBNetMutator(BaseMutator): - """ - This mutator initializes and operates all the LayerChoices of the input model. - It is for the corresponding trainer to control the training process of LayerChoices, - coordinating with whole training process. - """ - def __init__(self, model, lookup_table): - """ - Init a MixedOp instance for each mutable i.e., LayerChoice. - And register the instantiated MixedOp in corresponding LayerChoice. - If does not register it in LayerChoice, DataParallel does not work then, - because architecture weights are not included in the DataParallel model. - When MixedOPs are registered, we use ```requires_grad``` to control - whether calculate gradients of architecture weights. - - Parameters - ---------- - model : pytorch model - The model that users want to tune, it includes search space defined with nni nas apis - """ - super(FBNetMutator, self).__init__(model) - self.mutable_list = [] - - # Collect the op names of the candidate ops within each mutable - ops_names_mutable = dict() - left = 0 - right = 1 - for stage_name in lookup_table.layer_num: - right = lookup_table.layer_num[stage_name] - stage_ops = lookup_table.lut_ops[stage_name] - ops_names = [op_name for op_name in stage_ops] - - for i in range(left, left + right): - ops_names_mutable[i] = ops_names - left = right - - # Create the mixed op - for i, mutable in enumerate(self.undedup_mutables): - ops_names = ops_names_mutable[i] - latency_mutable = lookup_table.lut_perf[i] - latency = [latency_mutable[op_name] for op_name in ops_names] - self.mutable_list.append(mutable) - mutable.registered_module = MixedOp(mutable, latency) - - def on_forward_layer_choice(self, mutable, *args, **kwargs): - """ - Callback of layer choice forward. This function defines the forward - logic of the input mutable. So mutable is only interface, its real - implementation is defined in mutator. - - Parameters - ---------- - mutable: LayerChoice - forward logic of this input mutable - args: list of torch.Tensor - inputs of this mutable - kwargs: dict - inputs of this mutable - - Returns - ------- - torch.Tensor - output of this mutable, i.e., LayerChoice - int - index of the chosen op - """ - # FIXME: return mask, to be consistent with other algorithms - idx = mutable.registered_module.chosen_index - return mutable.registered_module(mutable, *args, **kwargs), idx - - def num_arch_params(self): - """ - The number of mutables, i.e., LayerChoice - - Returns - ------- - int - the number of LayerChoice in user model - """ - return len(self.mutable_list) - - def get_architecture_parameters(self): - """ - Get all the architecture parameters. - - yield - ----- - PyTorch Parameter - Return ap_path_alpha of the traversed mutable - """ - for mutable in self.undedup_mutables: - yield mutable.registered_module.get_path_alpha() - - def arch_requires_grad(self): - """ - Make architecture weights require gradient - """ - for mutable in self.undedup_mutables: - mutable.registered_module.to_requires_grad() - - def arch_disable_grad(self): - """ - Disable gradient of architecture weights, i.e., does not - calcuate gradient for them. - """ - for mutable in self.undedup_mutables: - mutable.registered_module.to_disable_grad() - - def sample_final(self): - """ - Generate the final chosen architecture. - - Returns - ------- - dict - the choice of each mutable, i.e., LayerChoice - """ - result = dict() - for mutable in self.undedup_mutables: - assert isinstance(mutable, LayerChoice) - index = mutable.registered_module.chosen_index - # pylint: disable=not-callable - result[mutable.key] = F.one_hot(torch.tensor(index), num_classes=len(mutable)).view(-1).bool() - return result diff --git a/nni/algorithms/nas/pytorch/fbnet/trainer.py b/nni/algorithms/nas/pytorch/fbnet/trainer.py deleted file mode 100644 index 20880948f69..00000000000 --- a/nni/algorithms/nas/pytorch/fbnet/trainer.py +++ /dev/null @@ -1,402 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import time -import json -import os -import torch - -import numpy as np - -from torch import nn as nn -from torch.autograd import Variable -from nni.nas.pytorch.base_trainer import BaseTrainer -from nni.nas.pytorch.trainer import TorchTensorEncoder -from nni.nas.pytorch.utils import AverageMeter -from nni.algorithms.nas.pytorch.fbnet import FBNetMutator -from .utils import accuracy - - -class RegularizerLoss(nn.Module): - """Auxilliary loss for hardware-aware NAS.""" - - def __init__(self, config): - super(RegularizerLoss, self).__init__() - self.mode = config.mode - self.alpha = config.alpha - self.beta = config.beta - - def forward(self, perf_cost, batch_size=1): - if self.mode == 'mul': - return self.alpha * torch.log((perf_cost / batch_size) ** self.beta) - - elif self.mode == 'add': - return self.alpha * ((perf_cost / batch_size) ** self.beta) - - -class FBNetTrainer(BaseTrainer): - def __init__( - self, - model, - auxiliarynet, - model_optim, - criterion, - device, - device_ids, - config, - lookup_table, - train_loader, - valid_loader, - n_epochs=300, - load_ckpt=False, - arch_path=None, - logger=None, - ): - """ - Parameters - ---------- - model : pytorch model - the user model, which has mutables - auxiliarynet : pytorch model - the auxiliarynet to regress angle - model_optim : pytorch optimizer - the user defined optimizer - criterion : pytorch loss - the main task loss - device : pytorch device - the devices to train/search the model - device_ids : list of int - the indexes of devices used for training - config : class - configuration object for fbnet training - lookup_table : class - lookup table object for fbnet training - train_loader : pytorch data loader - data loader for the training set - valid_loader : pytorch data loader - data loader for the validation set - n_epochs : int - number of epochs to train/search - load_ckpt : bool - whether load checkpoint - arch_path : str - the path to store chosen architecture - logger : logger - the logger - """ - self.model = model - self.auxiliarynet = auxiliarynet - self.model_optim = model_optim - self.train_loader = train_loader - self.valid_loader = valid_loader - self.device = device - self.dev_num = len(device_ids) - self.n_epochs = n_epochs - self.config = config - self.lookup_table = lookup_table - self.arch_search = config.arch_search - self.start_epoch = config.start_epoch - self.temperature = config.init_temperature - self.exp_anneal_rate = config.exp_anneal_rate - self.mode = config.mode - - self.load_ckpt = load_ckpt - self.arch_path = arch_path - self.logger = logger - - # scheduler of learning rate - self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( - model_optim, T_max=n_epochs, last_epoch=-1 - ) - - if self.arch_search: - # init mutator - self.mutator = FBNetMutator(model, lookup_table) - - # DataParallel should be put behind the init of mutator - self.model = torch.nn.DataParallel( - self.model, device_ids=device_ids - ).to(device) - self.auxiliarynet = torch.nn.DataParallel( - self.auxiliarynet, device_ids=device_ids - ).to(device) - - if self.arch_search: - # build architecture optimizer - self.arch_optimizer = torch.optim.AdamW( - self.mutator.get_architecture_parameters(), - config.nas_lr, - weight_decay=config.nas_weight_decay, - ) - self.reg_loss = RegularizerLoss(config=config) - - self.criterion = criterion - self.epoch = 0 - - def _layer_choice_sample(self): - """ - sample the index of network within layer choice - """ - stage_names = [stage_name for stage_name in self.lookup_table.layer_num] - stage_lnum = [ - self.lookup_table.layer_num[stage_name] for stage_name in stage_names - ] - - # get the choice idx in each layer - choice_ids = list() - layer_id = 0 - for param in self.mutator.get_architecture_parameters(): - param_np = param.detach().cpu().numpy() - op_idx = np.argmax(param_np) - choice_ids.append(op_idx) - self.logger.info("layer {}: {}, index: {}".format(layer_id, param_np, op_idx)) - layer_id += 1 - - # get the arch_sample - choice_names = list() - layer_id = 0 - for i, stage_name in enumerate(stage_names): - ops_names = [ - op_name for op_name in self.lookup_table.lut_ops[stage_name] - ] - for j in range(stage_lnum[i]): - searched_op = ops_names[choice_ids[layer_id]] - choice_names.append(searched_op) - layer_id += 1 - - self.logger.info(choice_names) - return choice_names - - def _validate(self): - """ - Do validation. During validation, LayerChoices use the chosen active op. - - Returns - ------- - float, float, float - average loss, average top1 accuracy, average top5 accuracy - """ - - # test on validation set under eval mode - self.model.eval() - self.auxiliarynet.eval() - - losses, nme = list(), list() - batch_time = AverageMeter('batch_time') - end = time.time() - with torch.no_grad(): - for i, (img, landmark_gt, euler_angle_gt) in enumerate(self.valid_loader): - img = img.to(self.device, non_blocking=True) - landmark_gt = landmark_gt.to(self.device, non_blocking=True) - euler_angle_gt = euler_angle_gt.to(self.device, non_blocking=True) - - if self.arch_search: - perf_cost = Variable( - torch.zeros(self.dev_num, 1) - ).to(self.device, non_blocking=True) - landmark, _, _ = self.model(img, self.temperature, perf_cost) - - else: - landmark, _ = self.model(img) - - landmark = landmark.squeeze() - loss = torch.mean(torch.sum((landmark_gt - landmark) ** 2, axis=1)) - - landmark = landmark.cpu().numpy().reshape(landmark.shape[0], -1, 2) - landmark_gt = landmark_gt.cpu().numpy().reshape(landmark_gt.shape[0], -1, 2) - _, nme_i = accuracy(landmark, landmark_gt) - losses.append(loss.cpu().numpy()) - for item in nme_i: - nme.append(item) - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - self.logger.info("===> Evaluate:") - self.logger.info("Eval set: Average loss: {:.4f} nme: {:.4f}".format( - np.mean(losses), np.mean(nme) - ) - ) - return np.mean(losses), np.mean(nme) - - def _train_epoch(self, epoch, optimizer, data_loader, arch_train=False): - """ - Train one epoch. - """ - # switch to train mode - self.model.train() - self.auxiliarynet.train() - - batch_time = AverageMeter('batch_time') - data_time = AverageMeter('data_time') - losses = AverageMeter('losses') - - end = time.time() - for i, (img, landmark_gt, euler_angle_gt) in enumerate(data_loader): - data_time.update(time.time() - end) - img = img.to(self.device, non_blocking=True) - landmark_gt = landmark_gt.to(self.device, non_blocking=True) - euler_angle_gt = euler_angle_gt.to(self.device, non_blocking=True) - - if self.arch_search: - perf_cost = Variable( - torch.zeros(self.dev_num, 1), requires_grad=True - ).to(self.device, non_blocking=True) - landmarks, features, perf_cost = self.model(img, self.temperature, perf_cost) - else: - landmarks, features = self.model(img) - landmarks = landmarks.squeeze() - angle = self.auxiliarynet(features) - - # task loss - weighted_loss, l2_loss = self.criterion( - landmark_gt, euler_angle_gt, angle, landmarks - ) - loss = l2_loss if arch_train else weighted_loss - - if self.arch_search: - # hardware-aware loss - regu_loss = self.reg_loss(perf_cost.mean(dim=0)) - if self.mode == 'mul': - loss = loss * regu_loss - elif self.mode == 'add': - loss = loss + regu_loss - - # compute gradient and do SGD step - optimizer.zero_grad() - loss.backward() - optimizer.step() - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - # measure accuracy and record loss - losses.update(loss.item(), img.size(0)) - - if i % 10 == 0: - batch_log = 'Train [{0}][{1}]\t' \ - 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ - 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' \ - 'Loss {losses.val:.4f} ({losses.avg:.4f})'. \ - format(epoch + 1, i, batch_time=batch_time, data_time=data_time, - losses=losses) - self.logger.info(batch_log) - - def _warm_up(self): - """ - Warm up the model, during warm up, architecture weights are not trained. - """ - for epoch in range(self.epoch, self.start_epoch): - self.logger.info('\n--------Warmup epoch: %d--------\n', epoch + 1) - self._train_epoch(epoch, self.model_optim, self.train_loader) - # adjust learning rate - self.scheduler.step() - - # validation - _, _ = self._validate() - if epoch % 10 == 0: - filename = os.path.join(self.config.model_dir, "checkpoint_%s.pth" % epoch) - self.save_checkpoint(epoch, filename) - - def _train(self): - """ - Train the model, it trains model weights and architecute weights. - Architecture weights are trained according to the schedule. - Before updating architecture weights, ```requires_grad``` is enabled. - Then, it is disabled after the updating, in order not to update - architecture weights when training model weights. - """ - if self.arch_search: - arch_param_num = self.mutator.num_arch_params() - self.logger.info('#arch_params: {}'.format(arch_param_num)) - self.epoch = max(self.start_epoch, self.epoch) - val_nme = 1e6 - - for epoch in range(self.epoch, self.n_epochs): - self.logger.info('\n--------Train epoch: %d--------\n', epoch + 1) - # update the weight parameters - self._train_epoch(epoch, self.model_optim, self.train_loader) - # adjust learning rate - self.scheduler.step() - - if self.arch_search: - self.logger.info("Update architecute parameters") - # update the architecture parameters - self._train_epoch(epoch, self.arch_optimizer, self.valid_loader, True) - - # validate - _, nme = self._validate() - - # temperature annealing - self.temperature = self.temperature * self.exp_anneal_rate - # sub-network sampling - choice_names = self._layer_choice_sample() if self.arch_search else None - - if epoch % 10 == 0: - filename = os.path.join(self.config.model_dir, "checkpoint_%s.pth" % epoch) - self.save_checkpoint(epoch, filename, choice_names=choice_names) - if nme < val_nme: - filename = os.path.join(self.config.model_dir, "checkpoint_min_nme.pth") - self.save_checkpoint(epoch, filename, choice_names=choice_names) - val_nme = nme - self.logger.info("Best nme: {:.4f}".format(val_nme)) - - def save_checkpoint(self, epoch, filename, choice_names=None): - """ - Save checkpoint of the whole model. Saving model weights and architecture weights in - ```ckpt_path```, and saving currently chosen architecture in ```arch_path```. - """ - state = { - "pfld_backbone": self.model.state_dict(), - "auxiliarynet": self.auxiliarynet.state_dict(), - 'optim': self.model_optim.state_dict(), - 'epoch': epoch, - 'arch_sample': choice_names, - } - torch.save(state, filename) - self.logger.info("Save checkpoint to {0:}".format(filename)) - - if self.arch_path: - self.export(self.arch_path) - - def load_checkpoint(self, filename): - """ - Load the checkpoint from ```ckpt_path```. - """ - ckpt = torch.load(filename) - self.epoch = ckpt['epoch'] - self.model.load_state_dict(ckpt['pfld_backbone']) - self.auxiliarynet.load_state_dict(ckpt['auxiliarynet']) - self.model_optim.load_state_dict(ckpt['optim']) - - def train(self): - """ - Train the whole model. - """ - if self.load_ckpt: - filename = os.path.join(self.config.model_dir, "checkpoint_min_nme.pth") - if os.path.exists(filename): - self.load_checkpoint(filename) - - if (self.epoch < self.start_epoch) and self.arch_search: - self._warm_up() - self._train() - - def export(self, file_name): - """ - Export the chosen architecture into a file - - Parameters - ---------- - file_name : str - the file that stores exported chosen architecture - """ - exported_arch = self.mutator.sample_final() - with open(file_name, 'w') as f: - json.dump(exported_arch, f, indent=2, sort_keys=True, cls=TorchTensorEncoder) - - def validate(self): - raise NotImplementedError - - def checkpoint(self): - raise NotImplementedError diff --git a/nni/algorithms/nas/pytorch/fbnet/utils.py b/nni/algorithms/nas/pytorch/fbnet/utils.py deleted file mode 100644 index 80c0d0d5359..00000000000 --- a/nni/algorithms/nas/pytorch/fbnet/utils.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import numpy as np - -def accuracy(preds, target): - """preds/target:: numpy array, shape is (N, L, 2) - N: batchsize L: num of landmark - """ - N = preds.shape[0] - L = preds.shape[1] - rmse = np.zeros(N).astype(np.float32) - - for i in range(N): - pts_pred, pts_gt = ( - preds[i], - target[i], - ) - if L == 19: # aflw - interocular = 34 # meta['box_size'][i] - elif L == 29: # cofw - interocular = np.linalg.norm(pts_gt[8] - pts_gt[9]) - elif L == 68: # 300w - # interocular - interocular = np.linalg.norm(pts_gt[36] - pts_gt[45]) - elif L == 98: - interocular = np.linalg.norm(pts_gt[60] - pts_gt[72]) - elif L == 106: - # euclidean dis from left eye to right eye - interocular = np.linalg.norm(pts_gt[35] - pts_gt[93]) - else: - raise ValueError("Number of landmarks is wrong") - rmse[i] = np.sum(np.linalg.norm(pts_pred - pts_gt, axis=1)) / (interocular * L) - - return np.mean(rmse), rmse