From cb15be491b991c2bdf495db44cb7f987bbbde36a Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Wed, 11 Dec 2019 19:13:59 +0800 Subject: [PATCH 1/4] Enable eslint for nni_manager (#1845) * enable eslint * remove tslint --- azure-pipelines.yml | 5 + src/nni_manager/common/component.ts | 1 - src/nni_manager/common/log.ts | 1 - src/nni_manager/common/restServer.ts | 1 - src/nni_manager/common/utils.ts | 1 - src/nni_manager/core/nniDataStore.ts | 1 - src/nni_manager/core/sqlDatabase.ts | 2 - src/nni_manager/core/test/dataStore.test.ts | 1 - .../core/test/ipcInterfaceTerminate.test.ts | 2 +- src/nni_manager/core/test/sqlDatabase.test.ts | 4 +- src/nni_manager/package.json | 2 - src/nni_manager/rest_server/restHandler.ts | 2 - .../rest_server/test/mockedNNIManager.ts | 1 - .../rest_server/test/restserver.test.ts | 4 - .../common/clusterJobRestServer.ts | 5 - .../training_service/common/util.ts | 1 - .../kubernetes/azureStorageClientUtils.ts | 4 - .../frameworkcontrollerApiClient.ts | 27 +++--- .../frameworkcontrollerConfig.ts | 2 - .../frameworkcontrollerJobInfoCollector.ts | 5 +- .../frameworkcontrollerTrainingService.ts | 5 - .../kubernetes/kubeflow/kubeflowApiClient.ts | 3 - .../kubernetes/kubeflow/kubeflowConfig.ts | 6 -- .../kubeflow/kubeflowJobInfoCollector.ts | 5 +- .../kubeflow/kubeflowTrainingService.ts | 4 - .../kubernetes/kubernetesApiClient.ts | 1 - .../kubernetes/kubernetesConfig.ts | 2 - .../kubernetes/kubernetesJobRestServer.ts | 1 - .../kubernetes/kubernetesTrainingService.ts | 7 +- .../training_service/local/gpuScheduler.ts | 1 - .../local/localTrainingService.ts | 7 +- .../training_service/pai/hdfsClientUtility.ts | 5 - .../training_service/pai/paiData.ts | 1 - .../pai/paiJobInfoCollector.ts | 4 - .../training_service/pai/paiJobRestServer.ts | 1 - .../pai/paiTrainingService.ts | 14 +-- .../remote_machine/gpuScheduler.ts | 2 - .../remote_machine/remoteMachineData.ts | 1 - .../remoteMachineJobRestServer.ts | 1 - .../remoteMachineTrainingService.ts | 9 +- .../remote_machine/sshClientUtility.ts | 2 - src/nni_manager/yarn.lock | 92 ++----------------- 42 files changed, 37 insertions(+), 209 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f5916925d8..0c324ad5a8 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -17,6 +17,11 @@ jobs: - script: | source install.sh displayName: 'Install nni toolkit via source code' + - script: | + set -e + cd src/nni_manager + yarn eslint + displayName: 'Run eslint' - script: | python3 -m pip install torch==0.4.1 --user python3 -m pip install torchvision==0.2.1 --user diff --git a/src/nni_manager/common/component.ts b/src/nni_manager/common/component.ts index c50d9d751f..882e1b4e96 100644 --- a/src/nni_manager/common/component.ts +++ b/src/nni_manager/common/component.ts @@ -5,7 +5,6 @@ import * as ioc from 'typescript-ioc'; -// tslint:disable-next-line:no-any const Inject: (...args: any[]) => any = ioc.Inject; const Singleton: (target: Function) => void = ioc.Singleton; const Container = ioc.Container; diff --git a/src/nni_manager/common/log.ts b/src/nni_manager/common/log.ts index 540a560fdc..31a38d6e07 100644 --- a/src/nni_manager/common/log.ts +++ b/src/nni_manager/common/log.ts @@ -2,7 +2,6 @@ // Licensed under the MIT license. 'use strict'; -/* tslint:disable:no-any */ import * as fs from 'fs'; import * as path from 'path'; diff --git a/src/nni_manager/common/restServer.ts b/src/nni_manager/common/restServer.ts index 9816a7ce19..368aff977c 100644 --- a/src/nni_manager/common/restServer.ts +++ b/src/nni_manager/common/restServer.ts @@ -34,7 +34,6 @@ export abstract class RestServer { } get endPoint(): string { - // tslint:disable-next-line:no-http-string return `http://${this.hostName}:${this.port}`; } diff --git a/src/nni_manager/common/utils.ts b/src/nni_manager/common/utils.ts index 0f56245409..bbe755f42f 100644 --- a/src/nni_manager/common/utils.ts +++ b/src/nni_manager/common/utils.ts @@ -116,7 +116,6 @@ function uniqueString(len: number): string { function randomSelect(a: T[]): T { assert(a !== undefined); - // tslint:disable-next-line:insecure-random return a[Math.floor(Math.random() * a.length)]; } function parseArg(names: string[]): string { diff --git a/src/nni_manager/core/nniDataStore.ts b/src/nni_manager/core/nniDataStore.ts index fe661f11aa..47134430d7 100644 --- a/src/nni_manager/core/nniDataStore.ts +++ b/src/nni_manager/core/nniDataStore.ts @@ -304,7 +304,6 @@ class NNIDataStore implements DataStore { } } - // tslint:disable-next-line:cyclomatic-complexity private getTrialJobsByReplayEvents(trialJobEvents: TrialJobEventRecord[]): Map { this.log.debug('getTrialJobsByReplayEvents begin'); diff --git a/src/nni_manager/core/sqlDatabase.ts b/src/nni_manager/core/sqlDatabase.ts index 24ae136337..125a1aff6d 100644 --- a/src/nni_manager/core/sqlDatabase.ts +++ b/src/nni_manager/core/sqlDatabase.ts @@ -20,7 +20,6 @@ import { getLogger, Logger } from '../common/log'; import { ExperimentProfile } from '../common/manager'; import { TrialJobDetail } from '../common/trainingService'; -/* tslint:disable:no-any */ const createTables: string = ` create table TrialJobEvent (timestamp integer, trialJobId text, event text, data text, logPath text, sequenceId integer); @@ -91,7 +90,6 @@ class SqlDB implements Database { this.log.debug(`Database directory: ${dbDir}`); assert(fs.existsSync(dbDir)); - // tslint:disable-next-line:no-bitwise const mode: number = createNew ? (sqlite3.OPEN_CREATE | sqlite3.OPEN_READWRITE) : sqlite3.OPEN_READWRITE; const dbFileName: string = path.join(dbDir, 'nni.sqlite'); diff --git a/src/nni_manager/core/test/dataStore.test.ts b/src/nni_manager/core/test/dataStore.test.ts index 6a4f4290d7..bc7e7a00c7 100644 --- a/src/nni_manager/core/test/dataStore.test.ts +++ b/src/nni_manager/core/test/dataStore.test.ts @@ -107,7 +107,6 @@ describe('Unit test for dataStore', () => { } ]; - // tslint:disable-next-line:no-any const metricsData: any = [ { trial_job_id: '111', diff --git a/src/nni_manager/core/test/ipcInterfaceTerminate.test.ts b/src/nni_manager/core/test/ipcInterfaceTerminate.test.ts index b85cdb3c20..b16294f31b 100644 --- a/src/nni_manager/core/test/ipcInterfaceTerminate.test.ts +++ b/src/nni_manager/core/test/ipcInterfaceTerminate.test.ts @@ -47,7 +47,7 @@ function startProcess(): void { // create IPC interface dispatcher = createDispatcherInterface(proc); (dispatcher).onCommand((commandType: string, content: string): void => { - console.log(commandType, content); // tslint:disable-line:no-console + console.log(commandType, content); }); } diff --git a/src/nni_manager/core/test/sqlDatabase.test.ts b/src/nni_manager/core/test/sqlDatabase.test.ts index a76f659ea3..a5522fba1f 100644 --- a/src/nni_manager/core/test/sqlDatabase.test.ts +++ b/src/nni_manager/core/test/sqlDatabase.test.ts @@ -70,19 +70,17 @@ const metrics: MetricDataRecord[] = [ { timestamp: Date.now(), trialJobId: 'C', parameterId: '2', type: 'FINAL', sequence: 0, data: 2.2 } // 5 ]; -// tslint:disable-next-line:no-any function assertRecordEqual(record: any, value: any): void { assert.ok(record.timestamp > new Date(2018, 6, 1).getTime()); assert.ok(record.timestamp < Date.now()); - for (const key in value) { // tslint:disable-line:no-for-in + for (const key in value) { if (key !== 'timestamp') { assert.equal(record[key], value[key]); } } } -// tslint:disable-next-line:no-any function assertRecordsEqual(records: any[], inputs: any[], indices: number[]): void { assert.equal(records.length, indices.length); for (let i: number = 0; i < records.length; i++) { diff --git a/src/nni_manager/package.json b/src/nni_manager/package.json index 3b60ad1e1e..c942029a45 100644 --- a/src/nni_manager/package.json +++ b/src/nni_manager/package.json @@ -54,8 +54,6 @@ "rmdir": "^1.2.0", "tmp": "^0.0.33", "ts-node": "^7.0.0", - "tslint": "^5.12.0", - "tslint-microsoft-contrib": "^6.0.0", "typescript": "^3.2.2" }, "resolutions": { diff --git a/src/nni_manager/rest_server/restHandler.ts b/src/nni_manager/rest_server/restHandler.ts index ac16715bcf..2d8494cb36 100644 --- a/src/nni_manager/rest_server/restHandler.ts +++ b/src/nni_manager/rest_server/restHandler.ts @@ -32,7 +32,6 @@ class NNIRestHandler { public createRestHandler(): Router { const router: Router = Router(); - // tslint:disable-next-line:typedef router.use((req: Request, res: Response, next) => { this.log.debug(`${req.method}: ${req.url}: body:\n${JSON.stringify(req.body, undefined, 4)}`); res.header('Access-Control-Allow-Origin', '*'); @@ -179,7 +178,6 @@ class NNIRestHandler { router.put( '/experiment/cluster-metadata', expressJoi(ValidationSchemas.SETCLUSTERMETADATA), async (req: Request, res: Response) => { - // tslint:disable-next-line:no-any const metadata: any = req.body; const keys: string[] = Object.keys(metadata); try { diff --git a/src/nni_manager/rest_server/test/mockedNNIManager.ts b/src/nni_manager/rest_server/test/mockedNNIManager.ts index 90484f922d..5c8bc267b7 100644 --- a/src/nni_manager/rest_server/test/mockedNNIManager.ts +++ b/src/nni_manager/rest_server/test/mockedNNIManager.ts @@ -66,7 +66,6 @@ export class MockedNNIManager extends Manager { startTime: Date.now(), endTime: Date.now(), tags: ['test'], - // tslint:disable-next-line:no-http-string url: 'http://test', workingDirectory: '/tmp/mocked', form: { diff --git a/src/nni_manager/rest_server/test/restserver.test.ts b/src/nni_manager/rest_server/test/restserver.test.ts index 78191f2a97..36a601cb96 100644 --- a/src/nni_manager/rest_server/test/restserver.test.ts +++ b/src/nni_manager/rest_server/test/restserver.test.ts @@ -3,9 +3,7 @@ 'use strict'; -// tslint:disable-next-line:no-implicit-dependencies import { assert, expect } from 'chai'; -// tslint:disable-next-line:no-implicit-dependencies import * as request from 'request'; import { Container } from 'typescript-ioc'; @@ -54,7 +52,6 @@ describe('Unit test for rest server', () => { }); it('Test GET trial-jobs/:id', (done: Mocha.Done) => { - // tslint:disable-next-line:no-any request.get(`${ROOT_URL}/trial-jobs/1234`, (err: Error, res: request.Response, body: any) => { if (err) { assert.fail(err.message); @@ -88,7 +85,6 @@ describe('Unit test for rest server', () => { }); it('Test change concurrent-trial-jobs', (done: Mocha.Done) => { - // tslint:disable-next-line:no-any request.get(`${ROOT_URL}/experiment`, (err: Error, res: request.Response, body: any) => { if (err) { assert.fail(err.message); diff --git a/src/nni_manager/training_service/common/clusterJobRestServer.ts b/src/nni_manager/training_service/common/clusterJobRestServer.ts index 937d7a2155..49298744a3 100644 --- a/src/nni_manager/training_service/common/clusterJobRestServer.ts +++ b/src/nni_manager/training_service/common/clusterJobRestServer.ts @@ -4,7 +4,6 @@ 'use strict'; import * as assert from 'assert'; -// tslint:disable-next-line:no-implicit-dependencies import * as bodyParser from 'body-parser'; import { Request, Response, Router } from 'express'; import * as fs from 'fs'; @@ -71,10 +70,8 @@ export abstract class ClusterJobRestServer extends RestServer { } // Abstract method to handle trial metrics data - // tslint:disable-next-line:no-any protected abstract handleTrialMetrics(jobId: string, trialMetrics: any[]): void; - // tslint:disable: no-unsafe-any no-any protected createRestHandler(): Router { const router: Router = Router(); @@ -146,7 +143,6 @@ export abstract class ClusterJobRestServer extends RestServer { if (!skipLogging) { // Construct write stream to write remote trial's log into local file - // tslint:disable-next-line:non-literal-fs-path const writeStream: Writable = fs.createWriteStream(trialLogPath, { flags: 'a+', encoding: 'utf8', @@ -166,5 +162,4 @@ export abstract class ClusterJobRestServer extends RestServer { return router; } - // tslint:enable: no-unsafe-any no-any } diff --git a/src/nni_manager/training_service/common/util.ts b/src/nni_manager/training_service/common/util.ts index c1f181d952..0d578ac7e7 100644 --- a/src/nni_manager/training_service/common/util.ts +++ b/src/nni_manager/training_service/common/util.ts @@ -18,7 +18,6 @@ import { GPU_INFO_COLLECTOR_FORMAT_WINDOWS } from './gpuData'; * @param codeDir codeDir in nni config file * @returns file number under codeDir */ -// tslint:disable: no-redundant-jsdoc export async function validateCodeDir(codeDir: string): Promise { let fileCount: number | undefined; let fileNameValid: boolean = true; diff --git a/src/nni_manager/training_service/kubernetes/azureStorageClientUtils.ts b/src/nni_manager/training_service/kubernetes/azureStorageClientUtils.ts index 886c42f20d..d0858189f5 100644 --- a/src/nni_manager/training_service/kubernetes/azureStorageClientUtils.ts +++ b/src/nni_manager/training_service/kubernetes/azureStorageClientUtils.ts @@ -11,7 +11,6 @@ import { String } from 'typescript-string-operations'; import { getLogger } from '../../common/log'; import { mkDirP } from '../../common/utils'; -// tslint:disable: no-redundant-jsdoc no-any no-unsafe-any export namespace AzureStorageClientUtility { /** @@ -114,7 +113,6 @@ export namespace AzureStorageClientUtility { async function downloadFile(fileServerClient: any, azureDirectory: string, azureFileName: any, azureShare: any, localFilePath: string): Promise { const deferred: Deferred = new Deferred(); - // tslint:disable-next-line:non-literal-fs-path await fileServerClient.getFileToStream(azureShare, azureDirectory, azureFileName, fs.createWriteStream(localFilePath), (error: any, result: any, response: any) => { if (error) { @@ -136,7 +134,6 @@ export namespace AzureStorageClientUtility { * @param azureShare : the azure share used * @param localDirectory : local directory to be uploaded */ - // tslint:disable:non-literal-fs-path export async function uploadDirectory(fileServerClient: azureStorage.FileService, azureDirectory: string, azureShare: any, localDirectory: string): Promise { const deferred: Deferred = new Deferred(); @@ -221,4 +218,3 @@ export namespace AzureStorageClientUtility { return deferred.promise; } } -// tslint:enable: no-redundant-jsdoc no-any no-unsafe-any diff --git a/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerApiClient.ts b/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerApiClient.ts index 4a891e8852..f6d37c7d77 100644 --- a/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerApiClient.ts +++ b/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerApiClient.ts @@ -6,19 +6,6 @@ import * as fs from 'fs'; import { GeneralK8sClient, KubernetesCRDClient } from '../kubernetesApiClient'; -/** - * FrameworkController Client - */ -class FrameworkControllerClientFactory { - /** - * Factory method to generate operator client - */ - // tslint:disable-next-line:function-name - public static createClient(): KubernetesCRDClient { - return new FrameworkControllerClientV1(); - } -} - /** * FrameworkController ClientV1 */ @@ -26,7 +13,6 @@ class FrameworkControllerClientV1 extends KubernetesCRDClient { /** * constructor, to initialize frameworkcontroller CRD definition */ - // tslint:disable: no-unsafe-any no-any public constructor() { super(); this.crdSchema = JSON.parse(fs.readFileSync('./config/frameworkcontroller/frameworkcontrollerjob-crd-v1.json', 'utf8')); @@ -36,11 +22,22 @@ class FrameworkControllerClientV1 extends KubernetesCRDClient { protected get operator(): any { return this.client.apis['frameworkcontroller.microsoft.com'].v1.namespaces('default').frameworks; } - // tslint:enable: no-unsafe-any no-any public get containerName(): string { return 'framework'; } } +/** + * FrameworkController Client + */ +class FrameworkControllerClientFactory { + /** + * Factory method to generate operator client + */ + public static createClient(): KubernetesCRDClient { + return new FrameworkControllerClientV1(); + } +} + export { FrameworkControllerClientFactory, GeneralK8sClient }; diff --git a/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts b/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts index 8677c8ed65..bac330feb9 100644 --- a/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts +++ b/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts @@ -9,7 +9,6 @@ import { AzureStorage, KeyVaultConfig, KubernetesClusterConfig, KubernetesCluste KubernetesStorageKind, KubernetesTrialConfig, KubernetesTrialConfigTemplate, NFSConfig, StorageConfig } from '../kubernetesConfig'; -// tslint:disable:completed-docs export class FrameworkAttemptCompletionPolicy { public readonly minFailedTaskCount: number; public readonly minSucceededTaskCount: number; @@ -54,7 +53,6 @@ export class FrameworkControllerClusterConfig extends KubernetesClusterConfig { } } -// tslint:disable:function-name export class FrameworkControllerClusterConfigNFS extends KubernetesClusterConfigNFS { public readonly serviceAccountName: string; constructor( diff --git a/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerJobInfoCollector.ts b/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerJobInfoCollector.ts index 0f4b92f142..c3e453ee35 100644 --- a/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerJobInfoCollector.ts +++ b/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerJobInfoCollector.ts @@ -26,7 +26,6 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec return Promise.reject('kubernetesCRDClient is undefined'); } - // tslint:disable-next-line:no-any let kubernetesJobInfo: any; try { kubernetesJobInfo = await kubernetesCRDClient.getKubernetesJob(kubernetesTrialJob.kubernetesJobName); @@ -37,9 +36,9 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec return Promise.resolve(); } - // tslint:disable: no-unsafe-any if (kubernetesJobInfo.status && kubernetesJobInfo.status.state) { const frameworkJobType: FrameworkControllerJobStatus = kubernetesJobInfo.status.state; + /* eslint-disable require-atomic-updates */ switch (frameworkJobType) { case 'AttemptCreationPending': case 'AttemptCreationRequested': @@ -69,9 +68,9 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec } default: } + /* eslint-enable require-atomic-updates */ } return Promise.resolve(); } - // tslint:enable: no-unsafe-any } diff --git a/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts b/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts index 6b2e93ca89..87e8ad6c7a 100644 --- a/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts +++ b/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts @@ -101,7 +101,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple this.trialJobsMap.set(trialJobId, trialJobDetail); // Create frameworkcontroller job based on generated frameworkcontroller job resource config - // tslint:disable-next-line:no-any const frameworkcontrollerJobConfig: any = await this.prepareFrameworkControllerConfig( trialJobId, trialWorkingFolder, frameworkcontrollerJobName); await this.kubernetesCRDClient.createKubernetesJob(frameworkcontrollerJobConfig); @@ -112,7 +111,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple return Promise.resolve(trialJobDetail); } - // tslint:disable:no-redundant-jsdoc no-any no-unsafe-any public async setClusterMetadata(key: string, value: string): Promise { switch (key) { case TrialConfigMetadataKey.NNI_MANAGER_IP: @@ -171,7 +169,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple return Promise.resolve(); } - // tslint:enable: no-any no-unsafe-any /** * upload code files to nfs or azureStroage @@ -256,7 +253,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple } } - // tslint:disable: no-any no-unsafe-any private async prepareFrameworkControllerConfig(trialJobId: string, trialWorkingFolder: string, frameworkcontrollerJobName: string): Promise { @@ -447,7 +443,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple } }; } - // tslint:enable: no-any no-unsafe-any } export { FrameworkControllerTrainingService }; diff --git a/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowApiClient.ts b/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowApiClient.ts index dacf2229d0..60208aa24a 100644 --- a/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowApiClient.ts +++ b/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowApiClient.ts @@ -8,7 +8,6 @@ import { GeneralK8sClient, KubernetesCRDClient } from '../kubernetesApiClient'; import { KubeflowOperator } from './kubeflowConfig'; -// tslint:disable: no-unsafe-any no-any completed-docs class TFOperatorClientV1Alpha2 extends KubernetesCRDClient { /** * constructor, to initialize tfjob CRD definition @@ -130,7 +129,6 @@ class KubeflowOperatorClientFactory { /** * Factory method to generate operator client */ - // tslint:disable-next-line:function-name public static createClient(kubeflowOperator: KubeflowOperator, operatorApiVersion: string): KubernetesCRDClient { switch (kubeflowOperator) { case 'tf-operator': { @@ -169,5 +167,4 @@ class KubeflowOperatorClientFactory { } } -// tslint:enable: no-unsafe-any export { KubeflowOperatorClientFactory, GeneralK8sClient }; diff --git a/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowConfig.ts b/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowConfig.ts index a24b048487..6aea0bb879 100644 --- a/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowConfig.ts +++ b/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowConfig.ts @@ -26,7 +26,6 @@ export class KubeflowClusterConfig extends KubernetesClusterConfig { } } -// tslint:disable:completed-docs export class KubeflowClusterConfigNFS extends KubernetesClusterConfigNFS { public readonly operator: KubeflowOperator; constructor( @@ -43,7 +42,6 @@ export class KubeflowClusterConfigNFS extends KubernetesClusterConfigNFS { return 'nfs'; } - // tslint:disable-next-line:function-name public static getInstance(jsonObject: object): KubeflowClusterConfigNFS { const kubeflowClusterConfigObjectNFS: KubeflowClusterConfigNFS = jsonObject; assert (kubeflowClusterConfigObjectNFS !== undefined); @@ -75,7 +73,6 @@ export class KubeflowClusterConfigAzure extends KubernetesClusterConfigAzure { return 'azureStorage'; } - // tslint:disable-next-line:function-name public static getInstance(jsonObject: object): KubeflowClusterConfigAzure { const kubeflowClusterConfigObjectAzure: KubeflowClusterConfigAzure = jsonObject; @@ -91,7 +88,6 @@ export class KubeflowClusterConfigAzure extends KubernetesClusterConfigAzure { export class KubeflowClusterConfigFactory { - // tslint:disable-next-line:function-name public static generateKubeflowClusterConfig(jsonObject: object): KubeflowClusterConfig { const storageConfig: StorageConfig = jsonObject; if (storageConfig === undefined) { @@ -156,8 +152,6 @@ export class KubeflowTrialConfigPytorch extends KubeflowTrialConfig { } export class KubeflowTrialConfigFactory { - - // tslint:disable-next-line:function-name public static generateKubeflowTrialConfig(jsonObject: object, operator: KubeflowOperator): KubeflowTrialConfig { if (operator === 'tf-operator') { const kubeflowTrialConfigObject: KubeflowTrialConfigTensorflow = jsonObject; diff --git a/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowJobInfoCollector.ts b/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowJobInfoCollector.ts index cfd06f615f..53b3e1a373 100644 --- a/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowJobInfoCollector.ts +++ b/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowJobInfoCollector.ts @@ -26,7 +26,6 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector { return Promise.reject('kubernetesCRDClient is undefined'); } - // tslint:disable:no-any no-unsafe-any let kubernetesJobInfo: any; try { kubernetesJobInfo = await kubernetesCRDClient.getKubernetesJob(kubernetesTrialJob.kubernetesJobName); @@ -37,7 +36,7 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector { //This is not treat as a error status return Promise.resolve(); } - + /* eslint-disable require-atomic-updates */ if (kubernetesJobInfo.status && kubernetesJobInfo.status.conditions) { const latestCondition: any = kubernetesJobInfo.status.conditions[kubernetesJobInfo.status.conditions.length - 1]; const tfJobType: KubeflowJobStatus = latestCondition.type; @@ -63,7 +62,7 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector { default: } } - // tslint:enable:no-any no-unsafe-any + /* eslint-enable require-atomic-updates */ return Promise.resolve(); } diff --git a/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts b/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts index 49ab61703e..98c84a30b0 100644 --- a/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts +++ b/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts @@ -27,7 +27,6 @@ import { KubeflowClusterConfig, KubeflowClusterConfigAzure, KubeflowClusterConfi import { KubeflowJobInfoCollector } from './kubeflowJobInfoCollector'; import { KubeflowJobRestServer } from './kubeflowJobRestServer'; -// tslint:disable: no-unsafe-any no-any /** * Training Service implementation for Kubeflow * Refer https://github.com/kubeflow/kubeflow for more info about Kubeflow @@ -108,7 +107,6 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber return Promise.resolve(trialJobDetail); } - // tslint:disable:no-redundant-jsdoc public async setClusterMetadata(key: string, value: string): Promise { switch (key) { case TrialConfigMetadataKey.NNI_MANAGER_IP: @@ -461,7 +459,6 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber replicas: replicaNumber, template: { metadata: { - // tslint:disable-next-line:no-null-keyword creationTimestamp: null }, spec: spec @@ -469,5 +466,4 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber } } } -// tslint:enable: no-unsafe-any no-any export { KubeflowTrainingService }; diff --git a/src/nni_manager/training_service/kubernetes/kubernetesApiClient.ts b/src/nni_manager/training_service/kubernetes/kubernetesApiClient.ts index ab637fc246..e34b468bb2 100644 --- a/src/nni_manager/training_service/kubernetes/kubernetesApiClient.ts +++ b/src/nni_manager/training_service/kubernetes/kubernetesApiClient.ts @@ -10,7 +10,6 @@ import { getLogger, Logger } from '../../common/log'; /** * Generict Kubernetes client, target version >= 1.9 */ -// tslint:disable: no-any no-unsafe-any class GeneralK8sClient { protected readonly client: any; protected readonly log: Logger = getLogger(); diff --git a/src/nni_manager/training_service/kubernetes/kubernetesConfig.ts b/src/nni_manager/training_service/kubernetes/kubernetesConfig.ts index 61ef4088cf..0963ec6438 100644 --- a/src/nni_manager/training_service/kubernetes/kubernetesConfig.ts +++ b/src/nni_manager/training_service/kubernetes/kubernetesConfig.ts @@ -6,7 +6,6 @@ export type KubernetesStorageKind = 'nfs' | 'azureStorage'; import { MethodNotImplementedError } from '../../common/errors'; -// tslint:disable: completed-docs function-name export abstract class KubernetesClusterConfig { public readonly storage?: KubernetesStorageKind; public readonly apiVersion: string; @@ -91,7 +90,6 @@ export class KubernetesClusterConfigAzure extends KubernetesClusterConfig { } } -// tslint:disable-next-line:no-unnecessary-class export class KubernetesClusterConfigFactory { public static generateKubernetesClusterConfig(jsonObject: object): KubernetesClusterConfig { diff --git a/src/nni_manager/training_service/kubernetes/kubernetesJobRestServer.ts b/src/nni_manager/training_service/kubernetes/kubernetesJobRestServer.ts index fc71aa5f51..12d2ca2427 100644 --- a/src/nni_manager/training_service/kubernetes/kubernetesJobRestServer.ts +++ b/src/nni_manager/training_service/kubernetes/kubernetesJobRestServer.ts @@ -25,7 +25,6 @@ export class KubernetesJobRestServer extends ClusterJobRestServer { this.kubernetesTrainingService = kubernetesTrainingService; } - // tslint:disable-next-line:no-any protected handleTrialMetrics(jobId: string, metrics: any[]): void { if (this.kubernetesTrainingService === undefined) { throw Error('kubernetesTrainingService not initialized!'); diff --git a/src/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts b/src/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts index 9ee3bed161..e13bd75d51 100644 --- a/src/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts +++ b/src/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts @@ -61,7 +61,6 @@ abstract class KubernetesTrainingService { this.logCollection = 'none'; } - // tslint:disable:no-any public generatePodResource(memory: number, cpuNum: number, gpuNum: number): any { const resources: any = { memory: `${memory}Mi`, @@ -73,7 +72,7 @@ abstract class KubernetesTrainingService { } return resources; - } // tslint:enable:no-any + } public async listTrialJobs(): Promise { const jobs: TrialJobDetail[] = []; @@ -197,7 +196,6 @@ abstract class KubernetesTrainingService { await this.kubernetesJobRestServer.stop(); this.log.info('Kubernetes Training service rest server stopped successfully.'); } catch (error) { - // tslint:disable-next-line: no-unsafe-any this.log.error(`Kubernetes Training service rest server stopped failed, error: ${error.message}`); return Promise.reject(error); @@ -206,7 +204,6 @@ abstract class KubernetesTrainingService { return Promise.resolve(); } - // tslint:disable: no-unsafe-any no-any protected async createAzureStorage(vaultName: string, valutKeyName: string): Promise { try { const result: any = await cpp.exec(`az keyvault secret show --name ${valutKeyName} --vault-name ${vaultName}`); @@ -253,7 +250,6 @@ abstract class KubernetesTrainingService { return Promise.resolve(); } - // tslint:enable: no-unsafe-any no-any /** * Genereate run script for different roles(like worker or ps) @@ -271,7 +267,6 @@ abstract class KubernetesTrainingService { if (gpuNum === 0) { nvidiaScript = 'export CUDA_VISIBLE_DEVICES='; } - // tslint:disable-next-line: strict-boolean-expressions const nniManagerIp: string = this.nniManagerIpConfig ? this.nniManagerIpConfig.nniManagerIp : getIPV4Address(); const version: string = this.versionCheck ? await getVersion() : ''; const runScript: string = String.Format( diff --git a/src/nni_manager/training_service/local/gpuScheduler.ts b/src/nni_manager/training_service/local/gpuScheduler.ts index 0b28d37cdb..e7c2816474 100644 --- a/src/nni_manager/training_service/local/gpuScheduler.ts +++ b/src/nni_manager/training_service/local/gpuScheduler.ts @@ -86,7 +86,6 @@ class GPUScheduler { runGpuMetricsCollector(this.gpuMetricCollectorScriptFolder); } - // tslint:disable:non-literal-fs-path private async updateGPUSummary(): Promise { const gpuMetricPath: string = path.join(this.gpuMetricCollectorScriptFolder, 'gpu_metrics'); if (fs.existsSync(gpuMetricPath)) { diff --git a/src/nni_manager/training_service/local/localTrainingService.ts b/src/nni_manager/training_service/local/localTrainingService.ts index fd87c5ffc5..848817c1ce 100644 --- a/src/nni_manager/training_service/local/localTrainingService.ts +++ b/src/nni_manager/training_service/local/localTrainingService.ts @@ -31,7 +31,6 @@ import { GPUScheduler } from './gpuScheduler'; * success: true if the buffer contains at least one complete command; otherwise false * remain: remaining data after the first command */ -// tslint:disable:newline-per-chained-call informative-docs function decodeCommand(data: Buffer): [boolean, string, string, Buffer] { if (data.length < 8) { return [false, '', '', data]; @@ -46,7 +45,6 @@ function decodeCommand(data: Buffer): [boolean, string, string, Buffer] { return [true, commandType, content, remain]; } -// tslint:enable:newline-per-chained-call informative-docs /** * LocalTrialJobDetail @@ -252,7 +250,6 @@ class LocalTrainingService implements TrainingService { public async setClusterMetadata(key: string, value: string): Promise { if (!this.initialized) { this.rootDir = getExperimentRootDir(); - // tslint:disable-next-line:non-literal-fs-path if (!fs.existsSync(this.rootDir)) { await cpp.exec(`powershell.exe mkdir ${this.rootDir}`); } @@ -524,8 +521,8 @@ class LocalTrainingService implements TrainingService { await this.writeParameterFile(trialJobDetail.workingDirectory, trialJobDetail.form.hyperParameters); const trialJobProcess: cp.ChildProcess = runScript(path.join(trialJobDetail.workingDirectory, scriptName)); this.setTrialJobStatus(trialJobDetail, 'RUNNING'); - trialJobDetail.startTime = Date.now(); - trialJobDetail.pid = trialJobProcess.pid; + trialJobDetail.startTime = Date.now(); // eslint-disable-line require-atomic-updates + trialJobDetail.pid = trialJobProcess.pid; // eslint-disable-line require-atomic-updates this.setExtraProperties(trialJobDetail, resource); let buffer: Buffer = Buffer.alloc(0); diff --git a/src/nni_manager/training_service/pai/hdfsClientUtility.ts b/src/nni_manager/training_service/pai/hdfsClientUtility.ts index 1bf6e008e5..876cd9e0ac 100644 --- a/src/nni_manager/training_service/pai/hdfsClientUtility.ts +++ b/src/nni_manager/training_service/pai/hdfsClientUtility.ts @@ -17,7 +17,6 @@ export namespace HDFSClientUtility { * @param hdfsUserName HDFS user name */ export function hdfsExpRootDir(hdfsUserName: string): string { - // tslint:disable-next-line:prefer-template return '/' + unixPathJoin(hdfsUserName, 'nni', 'experiments', getExperimentId()); } @@ -47,10 +46,8 @@ export namespace HDFSClientUtility { * @param hdfsFilePath hdfs file path(target) * @param hdfsClient hdfs client */ - // tslint:disable: no-unsafe-any non-literal-fs-path no-any export async function copyFileToHdfs(localFilePath: string, hdfsFilePath: string, hdfsClient: any): Promise { const deferred: Deferred = new Deferred(); - // tslint:disable-next-line:non-literal-fs-path fs.exists(localFilePath, (exists: boolean) => { // Detect if local file exist if (exists) { @@ -90,7 +87,6 @@ export namespace HDFSClientUtility { for (const fileName of fileNameArray) { const fullFilePath: string = path.join(localDirectory, fileName); try { - // tslint:disable-next-line:non-literal-fs-path if (fs.lstatSync(fullFilePath) .isFile()) { await copyFileToHdfs(fullFilePath, path.join(hdfsDirectory, fileName), hdfsClient); @@ -227,5 +223,4 @@ export namespace HDFSClientUtility { return deferred.promise; } - // tslint:enable: no-unsafe-any non-literal-fs-path no-any } diff --git a/src/nni_manager/training_service/pai/paiData.ts b/src/nni_manager/training_service/pai/paiData.ts index c2c332a8db..011c9ff318 100644 --- a/src/nni_manager/training_service/pai/paiData.ts +++ b/src/nni_manager/training_service/pai/paiData.ts @@ -52,6 +52,5 @@ export const PAI_TRIAL_COMMAND_FORMAT: string = --pai_hdfs_output_dir '{9}' --pai_hdfs_host '{10}' --pai_user_name {11} --nni_hdfs_exp_dir '{12}' --webhdfs_path '/webhdfs/api/v1' \ --nni_manager_version '{13}' --log_collection '{14}'`; -// tslint:disable:no-http-string export const PAI_LOG_PATH_FORMAT: string = `http://{0}/webhdfs/explorer.html#{1}`; diff --git a/src/nni_manager/training_service/pai/paiJobInfoCollector.ts b/src/nni_manager/training_service/pai/paiJobInfoCollector.ts index 8272070ca3..04a10a8ac1 100644 --- a/src/nni_manager/training_service/pai/paiJobInfoCollector.ts +++ b/src/nni_manager/training_service/pai/paiJobInfoCollector.ts @@ -3,7 +3,6 @@ 'use strict'; -// tslint:disable-next-line:no-implicit-dependencies import * as request from 'request'; import { Deferred } from 'ts-deferred'; import { NNIError, NNIErrorNames } from '../../common/errors'; @@ -54,7 +53,6 @@ export class PAIJobInfoCollector { // Rest call to get PAI job info and update status // Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API const getJobInfoRequest: request.Options = { - // tslint:disable-next-line:no-http-string uri: `http://${paiClusterConfig.host}/rest-server/api/v1/user/${paiClusterConfig.userName}/jobs/${paiTrialJob.paiJobName}`, method: 'GET', json: true, @@ -64,7 +62,6 @@ export class PAIJobInfoCollector { } }; - // tslint:disable: no-unsafe-any no-any cyclomatic-complexity //TODO : pass in request timeout param? request(getJobInfoRequest, (error: Error, response: request.Response, body: any) => { if ((error !== undefined && error !== null) || response.statusCode >= 500) { @@ -128,5 +125,4 @@ export class PAIJobInfoCollector { return deferred.promise; } - // tslint:enable: no-unsafe-any no-any } diff --git a/src/nni_manager/training_service/pai/paiJobRestServer.ts b/src/nni_manager/training_service/pai/paiJobRestServer.ts index ee3afb6c7a..ca1fc070f5 100644 --- a/src/nni_manager/training_service/pai/paiJobRestServer.ts +++ b/src/nni_manager/training_service/pai/paiJobRestServer.ts @@ -34,7 +34,6 @@ export class PAIJobRestServer extends ClusterJobRestServer { this.paiTrainingService = component.get(PAITrainingService); } - // tslint:disable-next-line:no-any protected handleTrialMetrics(jobId: string, metrics: any[]): void { // Split metrics array into single metric, then emit // Warning: If not split metrics into single ones, the behavior will be UNKNOWN diff --git a/src/nni_manager/training_service/pai/paiTrainingService.ts b/src/nni_manager/training_service/pai/paiTrainingService.ts index 2b1dae21f3..d23ec35fa3 100644 --- a/src/nni_manager/training_service/pai/paiTrainingService.ts +++ b/src/nni_manager/training_service/pai/paiTrainingService.ts @@ -5,7 +5,6 @@ import * as fs from 'fs'; import * as path from 'path'; -// tslint:disable-next-line:no-implicit-dependencies import * as request from 'request'; import * as component from '../../common/component'; @@ -45,7 +44,6 @@ class PAITrainingService implements TrainingService { private paiClusterConfig?: PAIClusterConfig; private readonly jobQueue: string[]; private stopping: boolean = false; - // tslint:disable-next-line:no-any private hdfsClient: any; private paiToken? : string; private paiTokenUpdateTime?: number; @@ -171,7 +169,6 @@ class PAITrainingService implements TrainingService { return true; } - // tslint:disable:no-http-string public cancelTrialJob(trialJobId: string, isEarlyStopped: boolean = false): Promise { const trialJobDetail: PAITrialJobDetail | undefined = this.trialJobsMap.get(trialJobId); const deferred: Deferred = new Deferred(); @@ -203,7 +200,6 @@ class PAITrainingService implements TrainingService { // Set trialjobDetail's early stopped field, to mark the job's cancellation source trialJobDetail.isEarlyStopped = isEarlyStopped; - // tslint:disable-next-line:no-any request(stopJobRequest, (error: Error, response: request.Response, body: any) => { if ((error !== undefined && error !== null) || response.statusCode >= 400) { this.log.error(`PAI Training service: stop trial ${trialJobId} to PAI Cluster failed!`); @@ -217,8 +213,6 @@ class PAITrainingService implements TrainingService { return deferred.promise; } - // tslint:disable: no-unsafe-any no-any - // tslint:disable-next-line:max-func-body-length public async setClusterMetadata(key: string, value: string): Promise { const deferred: Deferred = new Deferred(); @@ -298,7 +292,6 @@ class PAITrainingService implements TrainingService { return deferred.promise; } - // tslint:enable: no-unsafe-any public getClusterMetadata(key: string): Promise { const deferred: Deferred = new Deferred(); @@ -319,7 +312,6 @@ class PAITrainingService implements TrainingService { deferred.resolve(); this.log.info('PAI Training service rest server stopped successfully.'); } catch (error) { - // tslint:disable-next-line: no-unsafe-any this.log.error(`PAI Training service rest server stopped failed, error: ${error.message}`); deferred.reject(error); } @@ -331,7 +323,6 @@ class PAITrainingService implements TrainingService { return this.metricsEmitter; } - // tslint:disable-next-line:max-func-body-length private async submitTrialJobToPAI(trialJobId: string): Promise { const deferred: Deferred = new Deferred(); const trialJobDetail: PAITrialJobDetail | undefined = this.trialJobsMap.get(trialJobId); @@ -383,7 +374,6 @@ class PAITrainingService implements TrainingService { } const hdfsCodeDir: string = HDFSClientUtility.getHdfsTrialWorkDir(this.paiClusterConfig.userName, trialJobId); const hdfsOutputDir: string = unixPathJoin(hdfsCodeDir, 'nnioutput'); - // tslint:disable-next-line: strict-boolean-expressions const nniManagerIp: string = this.nniManagerIpConfig ? this.nniManagerIpConfig.nniManagerIp : getIPV4Address(); const version: string = this.versionCheck ? await getVersion() : ''; const nniPaiTrialCommand: string = String.Format( @@ -407,7 +397,6 @@ class PAITrainingService implements TrainingService { ) .replace(/\r\n|\n|\r/gm, ''); - // tslint:disable-next-line:no-console this.log.info(`nniPAItrial command is ${nniPaiTrialCommand.trim()}`); const paiTaskRoles: PAITaskRole[] = [ new PAITaskRole( @@ -449,7 +438,7 @@ class PAITrainingService implements TrainingService { await HDFSClientUtility.copyDirectoryToHdfs(trialLocalTempFolder, hdfsCodeDir, this.hdfsClient); } catch (error) { this.log.error(`PAI Training service: copy ${this.paiTrialConfig.codeDir} to HDFS ${hdfsCodeDir} failed, error is ${error}`); - trialJobDetail.status = 'FAILED'; + trialJobDetail.status = 'FAILED'; // eslint-disable-line require-atomic-updates deferred.resolve(true); return deferred.promise; @@ -467,7 +456,6 @@ class PAITrainingService implements TrainingService { Authorization: `Bearer ${this.paiToken}` } }; - // tslint:disable:no-any no-unsafe-any request(submitJobRequest, (error: Error, response: request.Response, body: any) => { if ((error !== undefined && error !== null) || response.statusCode >= 400) { const errorMessage: string = (error !== undefined && error !== null) ? error.message : diff --git a/src/nni_manager/training_service/remote_machine/gpuScheduler.ts b/src/nni_manager/training_service/remote_machine/gpuScheduler.ts index 7903504887..5b8321c68f 100644 --- a/src/nni_manager/training_service/remote_machine/gpuScheduler.ts +++ b/src/nni_manager/training_service/remote_machine/gpuScheduler.ts @@ -148,7 +148,6 @@ export class GPUScheduler { } } this.log.debug(`designated gpu indices: ${designatedGpuIndices}`); - // tslint:disable: strict-boolean-expressions rmMeta.gpuSummary.gpuInfos.forEach((gpuInfo: GPUInfo) => { // if the GPU has active process, OR be reserved by a job, // or index not in gpuIndices configuration in machineList, @@ -174,7 +173,6 @@ export class GPUScheduler { return totalResourceMap; } - // tslint:enable: strict-boolean-expressions private selectMachine(rmMetas: RemoteMachineMeta[]): RemoteMachineMeta { assert(rmMetas !== undefined && rmMetas.length > 0); diff --git a/src/nni_manager/training_service/remote_machine/remoteMachineData.ts b/src/nni_manager/training_service/remote_machine/remoteMachineData.ts index d8c96958ac..1d6193750a 100644 --- a/src/nni_manager/training_service/remote_machine/remoteMachineData.ts +++ b/src/nni_manager/training_service/remote_machine/remoteMachineData.ts @@ -186,7 +186,6 @@ export class SSHClientManager { /** * Create a new ssh connection client and initialize it */ - // tslint:disable:non-literal-fs-path private initNewSSHClient(): Promise { const deferred: Deferred = new Deferred(); const conn: Client = new Client(); diff --git a/src/nni_manager/training_service/remote_machine/remoteMachineJobRestServer.ts b/src/nni_manager/training_service/remote_machine/remoteMachineJobRestServer.ts index 55878b4bb2..1f38b4656e 100644 --- a/src/nni_manager/training_service/remote_machine/remoteMachineJobRestServer.ts +++ b/src/nni_manager/training_service/remote_machine/remoteMachineJobRestServer.ts @@ -25,7 +25,6 @@ export class RemoteMachineJobRestServer extends ClusterJobRestServer { this.remoteMachineTrainingService = component.get(RemoteMachineTrainingService); } - // tslint:disable-next-line:no-any protected handleTrialMetrics(jobId: string, metrics: any[]): void { // Split metrics array into single metric, then emit // Warning: If not split metrics into single ones, the behavior will be UNKNOWNls diff --git a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts index a5cdac9f70..57e6686bd8 100644 --- a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts +++ b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts @@ -206,7 +206,6 @@ class RemoteMachineTrainingService implements TrainingService { * Submit trial job * @param form trial job description form */ - // tslint:disable-next-line:informative-docs public async submitTrialJob(form: TrialJobApplicationForm): Promise { if (this.trialConfig === undefined) { throw new Error('trial config is not initialized'); @@ -255,7 +254,6 @@ class RemoteMachineTrainingService implements TrainingService { * Cancel trial job * @param trialJobId ID of trial job */ - // tslint:disable:informative-docs no-unsafe-any public async cancelTrialJob(trialJobId: string, isEarlyStopped: boolean = false): Promise { const deferred: Deferred = new Deferred(); const trialJob: RemoteMachineTrialJobDetail | undefined = this.trialJobsMap.get(trialJobId); @@ -319,7 +317,6 @@ class RemoteMachineTrainingService implements TrainingService { throw new Error('trial config parsed failed'); } // codeDir is not a valid directory, throw Error - // tslint:disable-next-line:non-literal-fs-path if (!fs.lstatSync(remoteMachineTrailConfig.codeDir) .isDirectory()) { throw new Error(`codeDir ${remoteMachineTrailConfig.codeDir} is not a directory`); @@ -438,7 +435,6 @@ class RemoteMachineTrainingService implements TrainingService { await SSHClientUtility.remoteExeCommand(`chmod 777 ${nniRootDir} ${nniRootDir}/* ${nniRootDir}/scripts/*`, conn); //Begin to execute gpu_metrics_collection scripts - // tslint:disable-next-line: no-floating-promises const script = getGpuMetricsCollectorBashScriptContent(remoteGpuScriptCollectorDir); SSHClientUtility.remoteExeCommand(`bash -c '${script}'`, conn); @@ -549,7 +545,6 @@ class RemoteMachineTrainingService implements TrainingService { command = `CUDA_VISIBLE_DEVICES=" " ${this.trialConfig.command}`; } } - // tslint:disable-next-line: strict-boolean-expressions const nniManagerIp: string = this.nniManagerIpConfig ? this.nniManagerIpConfig.nniManagerIp : getIPV4Address(); if (this.remoteRestServerPort === undefined) { const restServer: RemoteMachineJobRestServer = component.get(RemoteMachineJobRestServer); @@ -587,7 +582,6 @@ class RemoteMachineTrainingService implements TrainingService { // Copy files in codeDir to remote working directory await SSHClientUtility.copyDirectoryToRemote(trialLocalTempFolder, trialWorkingFolder, sshClient, this.remoteOS); // Execute command in remote machine - // tslint:disable-next-line: no-floating-promises SSHClientUtility.remoteExeCommand(`bash ${unixPathJoin(trialWorkingFolder, 'run.sh')}`, sshClient); } @@ -604,6 +598,7 @@ class RemoteMachineTrainingService implements TrainingService { const deferred: Deferred = new Deferred(); const jobpidPath: string = this.getJobPidPath(trialJob.id); const trialReturnCodeFilePath: string = unixPathJoin(this.remoteExpRootDir, 'trials', trialJob.id, '.nni', 'code'); + /* eslint-disable require-atomic-updates */ try { const killResult: number = (await SSHClientUtility.remoteExeCommand(`kill -0 \`cat ${jobpidPath}\``, sshClient)).exitCode; // if the process of jobpid is not alive any more @@ -640,7 +635,7 @@ class RemoteMachineTrainingService implements TrainingService { deferred.resolve(trialJob); } } - + /* eslint-enable require-atomic-updates */ return deferred.promise; } diff --git a/src/nni_manager/training_service/remote_machine/sshClientUtility.ts b/src/nni_manager/training_service/remote_machine/sshClientUtility.ts index 253cea1371..79af637dfb 100644 --- a/src/nni_manager/training_service/remote_machine/sshClientUtility.ts +++ b/src/nni_manager/training_service/remote_machine/sshClientUtility.ts @@ -58,7 +58,6 @@ export namespace SSHClientUtility { * @param command the command to execute remotely * @param client SSH Client */ - // tslint:disable:no-unsafe-any no-any export function remoteExeCommand(command: string, client: Client): Promise { const log: Logger = getLogger(); log.debug(`remoteExeCommand: command: [${command}]`); @@ -156,5 +155,4 @@ export namespace SSHClientUtility { return deferred.promise; } - // tslint:enable:no-unsafe-any no-any } diff --git a/src/nni_manager/yarn.lock b/src/nni_manager/yarn.lock index c61938bb3c..bde73d5c07 100644 --- a/src/nni_manager/yarn.lock +++ b/src/nni_manager/yarn.lock @@ -703,7 +703,7 @@ buffer-stream-reader@^0.1.1: version "0.1.1" resolved "https://registry.yarnpkg.com/buffer-stream-reader/-/buffer-stream-reader-0.1.1.tgz#ca8bf93631deedd8b8f8c3bb44991cc30951e259" -builtin-modules@^1.0.0, builtin-modules@^1.1.1: +builtin-modules@^1.0.0: version "1.1.1" resolved "https://registry.yarnpkg.com/builtin-modules/-/builtin-modules-1.1.1.tgz#270f076c5a72c02f5b65a47df94c5fe3a278892f" @@ -841,7 +841,7 @@ chalk@^1.0.0: strip-ansi "^3.0.0" supports-color "^2.0.0" -chalk@^2.0.0, chalk@^2.3.0: +chalk@^2.0.0: version "2.4.1" resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.1.tgz#18c49ab16a037b6eb0152cc83e3471338215b66e" dependencies: @@ -971,10 +971,6 @@ commander@2.15.1: version "2.15.1" resolved "https://registry.yarnpkg.com/commander/-/commander-2.15.1.tgz#df46e867d0fc2aec66a34662b406a9ccafff5b0f" -commander@^2.12.1: - version "2.16.0" - resolved "https://registry.yarnpkg.com/commander/-/commander-2.16.0.tgz#f16390593996ceb4f3eeb020b31d78528f7f8a50" - commander@~2.17.1: version "2.17.1" resolved "https://registry.yarnpkg.com/commander/-/commander-2.17.1.tgz#bd77ab7de6de94205ceacc72f1716d29f20a77bf" @@ -1134,7 +1130,7 @@ debug@^4.0.1, debug@^4.1.0, debug@^4.1.1: dependencies: ms "^2.1.1" -debuglog@*, debuglog@^1.0.1: +debuglog@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/debuglog/-/debuglog-1.0.1.tgz#aa24ffb9ac3df9a2351837cfb2d279360cd78492" @@ -1217,7 +1213,7 @@ dezalgo@^1.0.0, dezalgo@~1.0.3: asap "^2.0.0" wrappy "1" -diff@3.5.0, diff@^3.1.0, diff@^3.2.0: +diff@3.5.0, diff@^3.1.0: version "3.5.0" resolved "https://registry.yarnpkg.com/diff/-/diff-3.5.0.tgz#800c0dd1e0a8bfbc95835c202ad220fe317e5a12" @@ -2080,7 +2076,7 @@ import-lazy@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/import-lazy/-/import-lazy-2.1.0.tgz#05698e3d45c88e8d7e9d92cb0584e77f096f3e43" -imurmurhash@*, imurmurhash@^0.1.4: +imurmurhash@^0.1.4: version "0.1.4" resolved "https://registry.yarnpkg.com/imurmurhash/-/imurmurhash-0.1.4.tgz#9218b9b2b928a238b13dc4fb6b6d576f231453ea" @@ -2519,10 +2515,6 @@ lockfile@~1.0.3: dependencies: signal-exit "^3.0.2" -lodash._baseindexof@*: - version "3.1.0" - resolved "https://registry.yarnpkg.com/lodash._baseindexof/-/lodash._baseindexof-3.1.0.tgz#fe52b53a1c6761e42618d654e4a25789ed61822c" - lodash._baseuniq@~4.6.0: version "4.6.0" resolved "https://registry.yarnpkg.com/lodash._baseuniq/-/lodash._baseuniq-4.6.0.tgz#0ebb44e456814af7905c6212fa2c9b2d51b841e8" @@ -2530,28 +2522,10 @@ lodash._baseuniq@~4.6.0: lodash._createset "~4.0.0" lodash._root "~3.0.0" -lodash._bindcallback@*: - version "3.0.1" - resolved "https://registry.yarnpkg.com/lodash._bindcallback/-/lodash._bindcallback-3.0.1.tgz#e531c27644cf8b57a99e17ed95b35c748789392e" - -lodash._cacheindexof@*: - version "3.0.2" - resolved "https://registry.yarnpkg.com/lodash._cacheindexof/-/lodash._cacheindexof-3.0.2.tgz#3dc69ac82498d2ee5e3ce56091bafd2adc7bde92" - -lodash._createcache@*: - version "3.1.2" - resolved "https://registry.yarnpkg.com/lodash._createcache/-/lodash._createcache-3.1.2.tgz#56d6a064017625e79ebca6b8018e17440bdcf093" - dependencies: - lodash._getnative "^3.0.0" - lodash._createset@~4.0.0: version "4.0.3" resolved "https://registry.yarnpkg.com/lodash._createset/-/lodash._createset-4.0.3.tgz#0f4659fbb09d75194fa9e2b88a6644d363c9fe26" -lodash._getnative@*, lodash._getnative@^3.0.0: - version "3.9.1" - resolved "https://registry.yarnpkg.com/lodash._getnative/-/lodash._getnative-3.9.1.tgz#570bc7dede46d61cdcde687d65d3eecbaa3aaff5" - lodash._root@~3.0.0: version "3.0.1" resolved "https://registry.yarnpkg.com/lodash._root/-/lodash._root-3.0.1.tgz#fba1c4524c19ee9a5f8136b4609f017cf4ded692" @@ -2600,10 +2574,6 @@ lodash.pick@^4.4.0: version "4.4.0" resolved "https://registry.yarnpkg.com/lodash.pick/-/lodash.pick-4.4.0.tgz#52f05610fff9ded422611441ed1fc123a03001b3" -lodash.restparam@*: - version "3.6.1" - resolved "https://registry.yarnpkg.com/lodash.restparam/-/lodash.restparam-3.6.1.tgz#936a4e309ef330a7645ed4145986c85ae5b20805" - lodash.unescape@4.0.1: version "4.0.1" resolved "https://registry.yarnpkg.com/lodash.unescape/-/lodash.unescape-4.0.1.tgz#bf2249886ce514cda112fae9218cdc065211fc9c" @@ -3519,10 +3489,6 @@ path-key@^2.0.0, path-key@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/path-key/-/path-key-2.0.1.tgz#411cadb574c5a140d3a4b1910d40d80cc9f40b40" -path-parse@^1.0.5: - version "1.0.5" - resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.5.tgz#3c1adf871ea9cd6c9431b6ea2bd74a0ff055c4c1" - path-parse@^1.0.6: version "1.0.6" resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c" @@ -3834,7 +3800,7 @@ readable-stream@~2.0.0: string_decoder "~0.10.x" util-deprecate "~1.0.1" -readdir-scoped-modules@*, readdir-scoped-modules@^1.0.0: +readdir-scoped-modules@^1.0.0: version "1.1.0" resolved "https://registry.yarnpkg.com/readdir-scoped-modules/-/readdir-scoped-modules-1.1.0.tgz#8d45407b4f870a0dcaebc0e28670d18e74514309" dependencies: @@ -3977,12 +3943,6 @@ resolve@^1.10.0: dependencies: path-parse "^1.0.6" -resolve@^1.3.2: - version "1.8.1" - resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.8.1.tgz#82f1ec19a423ac1fbd080b0bab06ba36e84a7a26" - dependencies: - path-parse "^1.0.5" - responselike@1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/responselike/-/responselike-1.0.2.tgz#918720ef3b631c5642be068f15ade5a46f4ba1e7" @@ -4599,7 +4559,7 @@ ts-node@^7.0.0: source-map-support "^0.5.6" yn "^2.0.0" -tslib@^1.8.0, tslib@^1.8.1: +tslib@^1.8.1: version "1.9.3" resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.9.3.tgz#d7e4dd79245d85428c4d7e4822a79917954ca286" @@ -4607,42 +4567,6 @@ tslib@^1.9.0: version "1.10.0" resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.10.0.tgz#c3c19f95973fb0a62973fb09d90d961ee43e5c8a" -tslint-microsoft-contrib@^6.0.0: - version "6.2.0" - resolved "https://registry.yarnpkg.com/tslint-microsoft-contrib/-/tslint-microsoft-contrib-6.2.0.tgz#8aa0f40584d066d05e6a5e7988da5163b85f2ad4" - dependencies: - tsutils "^2.27.2 <2.29.0" - -tslint@^5.12.0: - version "5.18.0" - resolved "https://registry.yarnpkg.com/tslint/-/tslint-5.18.0.tgz#f61a6ddcf372344ac5e41708095bbf043a147ac6" - dependencies: - "@babel/code-frame" "^7.0.0" - builtin-modules "^1.1.1" - chalk "^2.3.0" - commander "^2.12.1" - diff "^3.2.0" - glob "^7.1.1" - js-yaml "^3.13.1" - minimatch "^3.0.4" - mkdirp "^0.5.1" - resolve "^1.3.2" - semver "^5.3.0" - tslib "^1.8.0" - tsutils "^2.29.0" - -"tsutils@^2.27.2 <2.29.0": - version "2.28.0" - resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-2.28.0.tgz#6bd71e160828f9d019b6f4e844742228f85169a1" - dependencies: - tslib "^1.8.1" - -tsutils@^2.29.0: - version "2.29.0" - resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-2.29.0.tgz#32b488501467acbedd4b85498673a0812aca0b99" - dependencies: - tslib "^1.8.1" - tsutils@^3.17.1: version "3.17.1" resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.17.1.tgz#ed719917f11ca0dee586272b2ac49e015a2dd759" @@ -4818,7 +4742,7 @@ v8-compile-cache@^2.0.3: version "2.1.0" resolved "https://registry.yarnpkg.com/v8-compile-cache/-/v8-compile-cache-2.1.0.tgz#e14de37b31a6d194f5690d67efc4e7f6fc6ab30e" -validate-npm-package-license@*, validate-npm-package-license@^3.0.1: +validate-npm-package-license@^3.0.1: version "3.0.4" resolved "https://registry.yarnpkg.com/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz#fc91f6b9c7ba15c857f4cb2c5defeec39d4f410a" dependencies: From 9484efb521717c9cb5639b1e132f6f196401da39 Mon Sep 17 00:00:00 2001 From: xuehui Date: Thu, 12 Dec 2019 10:19:44 +0800 Subject: [PATCH 2/4] Update the memroy usage and time cost in benchmark (#1829) * change auto-feature-engineering dir * add dir * update the ReadME * add test time and memory * update docs | update benchmark * add gitignore * update benchmark in feature selector(memory and time) * merge master * ignore F821 in flake8 | update benchmark number * update number in benchmark * fix flake8 * remove the update for the azure-pipeline.yml * update by comments --- README.md | 2 +- docs/en_US/FeatureEngineering/Overview.md | 15 ++-- .../auto-feature-engineering/README.md | 0 .../auto-feature-engineering/README_zh_CN.md | 0 .../gradient_feature_selector/.gitignore | 5 ++ .../benchmark_test.py | 90 +++++++++++++------ .../gradient_feature_selector/sklearn_test.py | 34 +++---- .../gradient_feature_selector/test_memory.py | 26 ++++++ .../gradient_feature_selector/test_time.py | 26 ++++++ 9 files changed, 147 insertions(+), 51 deletions(-) rename examples/{trials => feature_engineering}/auto-feature-engineering/README.md (100%) rename examples/{trials => feature_engineering}/auto-feature-engineering/README_zh_CN.md (100%) create mode 100644 examples/feature_engineering/gradient_feature_selector/.gitignore create mode 100644 examples/feature_engineering/gradient_feature_selector/test_memory.py create mode 100644 examples/feature_engineering/gradient_feature_selector/test_time.py diff --git a/README.md b/README.md index fb7bee7060..425f116c59 100644 --- a/README.md +++ b/README.md @@ -358,7 +358,7 @@ With authors' permission, we listed a set of NNI usage examples and relevant art * ### **External Repositories** ### * Run [ENAS](examples/tuners/enas_nni/README.md) with NNI * Run [Neural Network Architecture Search](examples/trials/nas_cifar10/README.md) with NNI - * [Automatic Feature Engineering](examples/trials/auto-feature-engineering/README.md) with NNI + * [Automatic Feature Engineering](examples/feature_engineering/auto-feature-engineering/README.md) with NNI * [Hyperparameter Tuning for Matrix Factorization](https://github.com/microsoft/recommenders/blob/master/notebooks/04_model_select_and_optimize/nni_surprise_svd.ipynb) with NNI * [scikit-nni](https://github.com/ksachdeva/scikit-nni) Hyper-parameter search for scikit-learn pipelines using NNI diff --git a/docs/en_US/FeatureEngineering/Overview.md b/docs/en_US/FeatureEngineering/Overview.md index 2ad183658f..30b0a51956 100644 --- a/docs/en_US/FeatureEngineering/Overview.md +++ b/docs/en_US/FeatureEngineering/Overview.md @@ -243,13 +243,14 @@ print("Pipeline Score: ", pipeline.score(X_train, y_train)) `Baseline` means without any feature selection, we directly pass the data to LogisticRegression. For this benchmark, we only use 10% data from the train as test data. For the GradientFeatureSelector, we only take the top20 features. The metric is the mean accuracy on the given test data and labels. -| Dataset | Baseline | GradientFeatureSelector top20 | GradientFeatureSelector auto | TreeBasedClassifier | #Train | #Feature | -| ----------- | ------ | ------ | ------- | ------- | -------- |-------- | -| colon-cancer | 0.7547 | 0.7368 | 0.5389 | 0.7223 | 62 | 2,000 | -| gisette | 0.9725 | 0.9241 | 0.9658 |0.9792 | 6,000 | 5,000 | -| rcv1 | 0.9644 | 0.7333 | 0.9548 |0.9615 | 20,242 | 47,236 | -| news20.binary | 0.9208 | 0.8780 | 0.8875 | 0.9070 | 19,996 | 1,355,191 | -| real-sim | 0.9681 | 0.7969 | 0.9439 |0.9591 | 72,309 | 20,958 | +| Dataset | All Features + LR (acc, time, memory) | GradientFeatureSelector + LR (acc, time, memory) | TreeBasedClassifier + LR (acc, time, memory) | #Train | #Feature | +| ----------- | ------ | ------ | ------- | ------- | -------- | +| colon-cancer | 0.7547, 890ms, 348MiB | 0.7368, 363ms, 286MiB | 0.7223, 171ms, 1171 MiB | 62 | 2,000 | +| gisette | 0.9725, 215ms, 584MiB | 0.89416, 446ms, 397MiB | 0.9792, 911ms, 234MiB | 6,000 | 5,000 | +| avazu | 0.8834, N/A, N/A | N/A, N/A, N/A | N/A, N/A, N/A | 40,428,967 | 1,000,000 | +| rcv1 | 0.9644, 557ms, 241MiB | 0.7333, 401ms, 281MiB | 0.9615, 752ms, 284MiB | 20,242 | 47,236 | +| news20.binary | 0.9208, 707ms, 361MiB | 0.6870, 565ms, 371MiB | 0.9070, 904ms, 364MiB | 19,996 | 1,355,191 | +| real-sim | 0.9681, 433ms, 274MiB | 0.7969, 251ms, 274MiB | 0.9591, 643ms, 367MiB | 72,309 | 20,958 | The dataset of benchmark could be download in [here](https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/ ) diff --git a/examples/trials/auto-feature-engineering/README.md b/examples/feature_engineering/auto-feature-engineering/README.md similarity index 100% rename from examples/trials/auto-feature-engineering/README.md rename to examples/feature_engineering/auto-feature-engineering/README.md diff --git a/examples/trials/auto-feature-engineering/README_zh_CN.md b/examples/feature_engineering/auto-feature-engineering/README_zh_CN.md similarity index 100% rename from examples/trials/auto-feature-engineering/README_zh_CN.md rename to examples/feature_engineering/auto-feature-engineering/README_zh_CN.md diff --git a/examples/feature_engineering/gradient_feature_selector/.gitignore b/examples/feature_engineering/gradient_feature_selector/.gitignore new file mode 100644 index 0000000000..048100f85d --- /dev/null +++ b/examples/feature_engineering/gradient_feature_selector/.gitignore @@ -0,0 +1,5 @@ +*.bz2 +*.svm +*.log +*memory +*time diff --git a/examples/feature_engineering/gradient_feature_selector/benchmark_test.py b/examples/feature_engineering/gradient_feature_selector/benchmark_test.py index 1ba2f53e43..b6ea85268e 100644 --- a/examples/feature_engineering/gradient_feature_selector/benchmark_test.py +++ b/examples/feature_engineering/gradient_feature_selector/benchmark_test.py @@ -18,6 +18,10 @@ import bz2 import urllib.request import numpy as np +import datetime + +import line_profiler +profile = line_profiler.LineProfiler() import os @@ -34,7 +38,7 @@ class Benchmark(): - def __init__(self, files, test_size = 0.2): + def __init__(self, files=None, test_size=0.2): self.files = files self.test_size = test_size @@ -73,40 +77,72 @@ def download(self, name, path): return update_name +@profile +def test_memory(pipeline_name, name, path): + if pipeline_name == "LR": + pipeline = make_pipeline(LogisticRegression()) + + if pipeline_name == "FGS": + pipeline = make_pipeline(FeatureGradientSelector(), LogisticRegression()) + + if pipeline_name == "Tree": + pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression()) + + test_benchmark = Benchmark() + print("Dataset:\t", name) + print("Pipeline:\t", pipeline_name) + test_benchmark.run_test(pipeline, name, path) + print("") + + +def test_time(pipeline_name, name, path): + if pipeline_name == "LR": + pipeline = make_pipeline(LogisticRegression()) + + if pipeline_name == "FGS": + pipeline = make_pipeline(FeatureGradientSelector(), LogisticRegression()) + + if pipeline_name == "Tree": + pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression()) + + test_benchmark = Benchmark() + print("Dataset:\t", name) + print("Pipeline:\t", pipeline_name) + starttime = datetime.datetime.now() + test_benchmark.run_test(pipeline, name, path) + endtime = datetime.datetime.now() + print("Used time: ", (endtime - starttime).microseconds/1000) + print("") + if __name__ == "__main__": LIBSVM_DATA = { "rcv1" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2", - # "avazu" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.bz2", "colon-cancer" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/covtype.libsvm.binary.bz2", "gisette" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/gisette_scale.bz2", - # "kdd2010" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/kdda.bz2", - # "kdd2012" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/kdd12.bz2", "news20.binary" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/news20.binary.bz2", "real-sim" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/real-sim.bz2", - "webspam" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/webspam_wc_normalized_trigram.svm.bz2" + "webspam" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/webspam_wc_normalized_trigram.svm.bz2", + "avazu" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.bz2" } - test_benchmark = Benchmark(LIBSVM_DATA) - - pipeline1 = make_pipeline(LogisticRegression()) - print("Test all data in LogisticRegression.") - print() - test_benchmark.run_all_test(pipeline1) - - pipeline2 = make_pipeline(FeatureGradientSelector(), LogisticRegression()) - print("Test data selected by FeatureGradientSelector in LogisticRegression.") - print() - test_benchmark.run_all_test(pipeline2) - - pipeline3 = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression()) - print("Test data selected by TreeClssifier in LogisticRegression.") - print() - test_benchmark.run_all_test(pipeline3) - - pipeline4 = make_pipeline(FeatureGradientSelector(n_features=20), LogisticRegression()) - print("Test data selected by FeatureGradientSelector top 20 in LogisticRegression.") - print() - test_benchmark.run_all_test(pipeline4) + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--pipeline_name', type=str, help='display pipeline_name.') + parser.add_argument('--name', type=str, help='display name.') + parser.add_argument('--object', type=str, help='display test object: time or memory.') + + args = parser.parse_args() + pipeline_name = args.pipeline_name + name = args.name + test_object = args.object + path = LIBSVM_DATA[name] + + if test_object == 'time': + test_time(pipeline_name, name, path) + elif test_object == 'memory': + test_memory(pipeline_name, name, path) + else: + print("Not support test object.\t", test_object) - print("Done.") \ No newline at end of file + print("Done.") diff --git a/examples/feature_engineering/gradient_feature_selector/sklearn_test.py b/examples/feature_engineering/gradient_feature_selector/sklearn_test.py index 4988bedb59..c04075ea54 100644 --- a/examples/feature_engineering/gradient_feature_selector/sklearn_test.py +++ b/examples/feature_engineering/gradient_feature_selector/sklearn_test.py @@ -30,26 +30,28 @@ from nni.feature_engineering.gradient_selector import FeatureGradientSelector -url_zip_train = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2' -urllib.request.urlretrieve(url_zip_train, filename='train.bz2') -f_svm = open('train.svm', 'wt') -with bz2.open('train.bz2', 'rb') as f_zip: - data = f_zip.read() - f_svm.write(data.decode('utf-8')) -f_svm.close() +def test(): + url_zip_train = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2' + urllib.request.urlretrieve(url_zip_train, filename='train.bz2') + f_svm = open('train.svm', 'wt') + with bz2.open('train.bz2', 'rb') as f_zip: + data = f_zip.read() + f_svm.write(data.decode('utf-8')) + f_svm.close() -X, y = load_svmlight_file('train.svm') -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) -fgs = FeatureGradientSelector(n_features=10) -fgs.fit(X_train, y_train) + X, y = load_svmlight_file('train.svm') + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) -print("selected features\t", fgs.get_selected_features()) -pipeline = make_pipeline(FeatureGradientSelector(n_epochs=1, n_features=10), LogisticRegression()) -# pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression()) -pipeline.fit(X_train, y_train) + pipeline = make_pipeline(FeatureGradientSelector(n_epochs=1, n_features=10), LogisticRegression()) + # pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression()) -print("Pipeline Score: ", pipeline.score(X_train, y_train)) \ No newline at end of file + pipeline.fit(X_train, y_train) + + print("Pipeline Score: ", pipeline.score(X_train, y_train)) + +if __name__ == "__main__": + test() \ No newline at end of file diff --git a/examples/feature_engineering/gradient_feature_selector/test_memory.py b/examples/feature_engineering/gradient_feature_selector/test_memory.py new file mode 100644 index 0000000000..862b656bbe --- /dev/null +++ b/examples/feature_engineering/gradient_feature_selector/test_memory.py @@ -0,0 +1,26 @@ +import os + +LIBSVM_DATA = { + "rcv1" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2", + "colon-cancer" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/covtype.libsvm.binary.bz2", + "gisette" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/gisette_scale.bz2", + "news20.binary" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/news20.binary.bz2", + "real-sim" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/real-sim.bz2", + "avazu" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.bz2", +} + +pipeline_name = "Tree" +device = "CUDA_VISIBLE_DEVICES=0 " +script = "setsid python -m memory_profiler benchmark_test.py " +test_object = "memory" + +for name in LIBSVM_DATA: + log_name = "_".join([pipeline_name, name, test_object]) + command = device + script + "--pipeline_name " + pipeline_name + " --name " + name + " --object " + test_object + " >" +log_name + " 2>&1 &" + print("command is\t", command) + os.system(command) + print("log is here\t", log_name) + +print("Done.") + + diff --git a/examples/feature_engineering/gradient_feature_selector/test_time.py b/examples/feature_engineering/gradient_feature_selector/test_time.py new file mode 100644 index 0000000000..3c049c4afc --- /dev/null +++ b/examples/feature_engineering/gradient_feature_selector/test_time.py @@ -0,0 +1,26 @@ +import os + +LIBSVM_DATA = { + "rcv1" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2", + "colon-cancer" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/covtype.libsvm.binary.bz2", + "gisette" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/gisette_scale.bz2", + "news20.binary" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/news20.binary.bz2", + "real-sim" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/real-sim.bz2", + "avazu" : "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.bz2", +} + +pipeline_name = "LR" +device = "CUDA_VISIBLE_DEVICES=0 " +script = "setsid python benchmark_test.py " +test_object = "time" + +for name in LIBSVM_DATA: + log_name = "_".join([pipeline_name, name, test_object]) + command = device + script + "--pipeline_name " + pipeline_name + " --name " + name + " --object " + test_object + " >" +log_name + " 2>&1 &" + print("command is\t", command) + os.system(command) + print("log is here\t", log_name) + +print("Done.") + + From ac6f420f224c331af8bda16edd9700e4cadea7e0 Mon Sep 17 00:00:00 2001 From: Tang Lang Date: Thu, 12 Dec 2019 10:27:21 +0800 Subject: [PATCH 3/4] Pruners refactor (#1820) --- ...ner_torch_vgg16.py => L1_torch_cifar10.py} | 58 +-- examples/model_compress/models/cifar10/vgg.py | 63 ++++ examples/model_compress/pruning_kd.py | 60 +-- ...r_torch_vgg19.py => slim_torch_cifar10.py} | 52 +-- .../nni/compression/torch/builtin_pruners.py | 354 +++++++++++------- src/sdk/pynni/tests/test_compressor.py | 19 +- 6 files changed, 295 insertions(+), 311 deletions(-) rename examples/model_compress/{L1_filter_pruner_torch_vgg16.py => L1_torch_cifar10.py} (70%) create mode 100644 examples/model_compress/models/cifar10/vgg.py rename examples/model_compress/{slim_pruner_torch_vgg19.py => slim_torch_cifar10.py} (73%) diff --git a/examples/model_compress/L1_filter_pruner_torch_vgg16.py b/examples/model_compress/L1_torch_cifar10.py similarity index 70% rename from examples/model_compress/L1_filter_pruner_torch_vgg16.py rename to examples/model_compress/L1_torch_cifar10.py index c54fc12119..40ad2bb023 100644 --- a/examples/model_compress/L1_filter_pruner_torch_vgg16.py +++ b/examples/model_compress/L1_torch_cifar10.py @@ -4,59 +4,7 @@ import torch.nn.functional as F from torchvision import datasets, transforms from nni.compression.torch import L1FilterPruner - - -class vgg(nn.Module): - def __init__(self, init_weights=True): - super(vgg, self).__init__() - cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512] - self.cfg = cfg - self.feature = self.make_layers(cfg, True) - num_classes = 10 - self.classifier = nn.Sequential( - nn.Linear(cfg[-1], 512), - nn.BatchNorm1d(512), - nn.ReLU(inplace=True), - nn.Linear(512, num_classes) - ) - if init_weights: - self._initialize_weights() - - def make_layers(self, cfg, batch_norm=True): - layers = [] - in_channels = 3 - for v in cfg: - if v == 'M': - layers += [nn.MaxPool2d(kernel_size=2, stride=2)] - else: - conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1, bias=False) - if batch_norm: - layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] - else: - layers += [conv2d, nn.ReLU(inplace=True)] - in_channels = v - return nn.Sequential(*layers) - - def forward(self, x): - x = self.feature(x) - x = nn.AvgPool2d(2)(x) - x = x.view(x.size(0), -1) - y = self.classifier(x) - return y - - def _initialize_weights(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - if m.bias is not None: - m.bias.data.zero_() - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(0.5) - m.bias.data.zero_() - elif isinstance(m, nn.Linear): - m.weight.data.normal_(0, 0.01) - m.bias.data.zero_() +from models.cifar10.vgg import VGG def train(model, device, train_loader, optimizer): @@ -111,7 +59,7 @@ def main(): ])), batch_size=200, shuffle=False) - model = vgg() + model = VGG(depth=16) model.to(device) # Train the base VGG-16 model @@ -162,7 +110,7 @@ def main(): # Test the exported model print('=' * 10 + 'Test on the pruned model after fine tune' + '=' * 10) - new_model = vgg() + new_model = VGG(depth=16) new_model.to(device) new_model.load_state_dict(torch.load('pruned_vgg16_cifar10.pth')) test(new_model, device, test_loader) diff --git a/examples/model_compress/models/cifar10/vgg.py b/examples/model_compress/models/cifar10/vgg.py new file mode 100644 index 0000000000..f293770c72 --- /dev/null +++ b/examples/model_compress/models/cifar10/vgg.py @@ -0,0 +1,63 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F + +defaultcfg = { + 11: [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512], + 13: [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512], + 16: [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512], + 19: [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512], +} + + +class VGG(nn.Module): + def __init__(self, depth=16): + super(VGG, self).__init__() + cfg = defaultcfg[depth] + self.cfg = cfg + self.feature = self.make_layers(cfg, True) + num_classes = 10 + self.classifier = nn.Sequential( + nn.Linear(cfg[-1], 512), + nn.BatchNorm1d(512), + nn.ReLU(inplace=True), + nn.Linear(512, num_classes) + ) + self._initialize_weights() + + def make_layers(self, cfg, batch_norm=False): + layers = [] + in_channels = 3 + for v in cfg: + if v == 'M': + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1, bias=False) + if batch_norm: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.ReLU(inplace=True)] + in_channels = v + return nn.Sequential(*layers) + + def forward(self, x): + x = self.feature(x) + x = nn.AvgPool2d(2)(x) + x = x.view(x.size(0), -1) + y = self.classifier(x) + return y + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(0.5) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() diff --git a/examples/model_compress/pruning_kd.py b/examples/model_compress/pruning_kd.py index 9e98ccc0d1..dcd680ef82 100644 --- a/examples/model_compress/pruning_kd.py +++ b/examples/model_compress/pruning_kd.py @@ -5,59 +5,7 @@ from torchvision import datasets, transforms from nni.compression.torch import L1FilterPruner from knowledge_distill.knowledge_distill import KnowledgeDistill - - -class vgg(nn.Module): - def __init__(self, init_weights=True): - super(vgg, self).__init__() - cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512] - self.cfg = cfg - self.feature = self.make_layers(cfg, True) - num_classes = 10 - self.classifier = nn.Sequential( - nn.Linear(cfg[-1], 512), - nn.BatchNorm1d(512), - nn.ReLU(inplace=True), - nn.Linear(512, num_classes) - ) - if init_weights: - self._initialize_weights() - - def make_layers(self, cfg, batch_norm=True): - layers = [] - in_channels = 3 - for v in cfg: - if v == 'M': - layers += [nn.MaxPool2d(kernel_size=2, stride=2)] - else: - conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1, bias=False) - if batch_norm: - layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] - else: - layers += [conv2d, nn.ReLU(inplace=True)] - in_channels = v - return nn.Sequential(*layers) - - def forward(self, x): - x = self.feature(x) - x = nn.AvgPool2d(2)(x) - x = x.view(x.size(0), -1) - y = self.classifier(x) - return y - - def _initialize_weights(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - if m.bias is not None: - m.bias.data.zero_() - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(0.5) - m.bias.data.zero_() - elif isinstance(m, nn.Linear): - m.weight.data.normal_(0, 0.01) - m.bias.data.zero_() +from models.cifar10.vgg import VGG def train(model, device, train_loader, optimizer, kd=None): @@ -119,7 +67,7 @@ def main(): ])), batch_size=200, shuffle=False) - model = vgg() + model = VGG(depth=16) model.to(device) # Train the base VGG-16 model @@ -156,7 +104,7 @@ def main(): print('=' * 10 + 'Fine tuning' + '=' * 10) optimizer_finetune = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4) best_top1 = 0 - kd_teacher_model = vgg() + kd_teacher_model = VGG(depth=16) kd_teacher_model.to(device) kd_teacher_model.load_state_dict(torch.load('vgg16_cifar10.pth')) kd = KnowledgeDistill(kd_teacher_model, kd_T=5) @@ -173,7 +121,7 @@ def main(): # Test the exported model print('=' * 10 + 'Test on the pruned model after fine tune' + '=' * 10) - new_model = vgg() + new_model = VGG(depth=16) new_model.to(device) new_model.load_state_dict(torch.load('pruned_vgg16_cifar10.pth')) test(new_model, device, test_loader) diff --git a/examples/model_compress/slim_pruner_torch_vgg19.py b/examples/model_compress/slim_torch_cifar10.py similarity index 73% rename from examples/model_compress/slim_pruner_torch_vgg19.py rename to examples/model_compress/slim_torch_cifar10.py index b84c8201ae..ebd36f44d4 100644 --- a/examples/model_compress/slim_pruner_torch_vgg19.py +++ b/examples/model_compress/slim_torch_cifar10.py @@ -4,53 +4,7 @@ import torch.nn.functional as F from torchvision import datasets, transforms from nni.compression.torch import SlimPruner - - -class vgg(nn.Module): - def __init__(self, init_weights=True): - super(vgg, self).__init__() - cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512] - self.feature = self.make_layers(cfg, True) - num_classes = 10 - self.classifier = nn.Linear(cfg[-1], num_classes) - if init_weights: - self._initialize_weights() - - def make_layers(self, cfg, batch_norm=False): - layers = [] - in_channels = 3 - for v in cfg: - if v == 'M': - layers += [nn.MaxPool2d(kernel_size=2, stride=2)] - else: - conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1, bias=False) - if batch_norm: - layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] - else: - layers += [conv2d, nn.ReLU(inplace=True)] - in_channels = v - return nn.Sequential(*layers) - - def forward(self, x): - x = self.feature(x) - x = nn.AvgPool2d(2)(x) - x = x.view(x.size(0), -1) - y = self.classifier(x) - return y - - def _initialize_weights(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - if m.bias is not None: - m.bias.data.zero_() - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(0.5) - m.bias.data.zero_() - elif isinstance(m, nn.Linear): - m.weight.data.normal_(0, 0.01) - m.bias.data.zero_() +from models.cifar10.vgg import VGG def updateBN(model): @@ -114,7 +68,7 @@ def main(): ])), batch_size=200, shuffle=False) - model = vgg() + model = VGG(depth=19) model.to(device) # Train the base VGG-19 model @@ -165,7 +119,7 @@ def main(): # Test the exported model print('=' * 10 + 'Test the export pruned model after fine tune' + '=' * 10) - new_model = vgg() + new_model = VGG(depth=19) new_model.to(device) new_model.load_state_dict(torch.load('pruned_vgg19_cifar10.pth')) test(new_model, device, test_loader) diff --git a/src/sdk/pynni/nni/compression/torch/builtin_pruners.py b/src/sdk/pynni/nni/compression/torch/builtin_pruners.py index dd23737c99..b31a8dd77f 100644 --- a/src/sdk/pynni/nni/compression/torch/builtin_pruners.py +++ b/src/sdk/pynni/nni/compression/torch/builtin_pruners.py @@ -5,7 +5,7 @@ import torch from .compressor import Pruner -__all__ = ['LevelPruner', 'AGP_Pruner', 'FPGMPruner', 'L1FilterPruner', 'SlimPruner'] +__all__ = ['LevelPruner', 'AGP_Pruner', 'SlimPruner', 'L1FilterPruner', 'L2FilterPruner', 'FPGMPruner'] logger = logging.getLogger('torch pruner') @@ -166,119 +166,132 @@ def update_epoch(self, epoch): self.if_init_list[k] = True -class FPGMPruner(Pruner): +class SlimPruner(Pruner): """ - A filter pruner via geometric median. - "Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration", - https://arxiv.org/pdf/1811.00250.pdf + A structured pruning algorithm that prunes channels by pruning the weights of BN layers. + Zhuang Liu, Jianguo Li, Zhiqiang Shen, Gao Huang, Shoumeng Yan and Changshui Zhang + "Learning Efficient Convolutional Networks through Network Slimming", 2017 ICCV + https://arxiv.org/pdf/1708.06519.pdf """ def __init__(self, model, config_list): """ Parameters ---------- - model : pytorch model - the model user wants to compress - config_list: list + config_list : list support key for each list item: - sparsity: percentage of convolutional filters to be pruned. """ + super().__init__(model, config_list) - self.mask_dict = {} - self.epoch_pruned_layers = set() + self.mask_calculated_ops = set() + weight_list = [] + if len(config_list) > 1: + logger.warning('Slim pruner only supports 1 configuration') + config = config_list[0] + for (layer, config) in self.detect_modules_to_compress(): + assert layer.type == 'BatchNorm2d', 'SlimPruner only supports 2d batch normalization layer pruning' + weight_list.append(layer.module.weight.data.abs().clone()) + all_bn_weights = torch.cat(weight_list) + k = int(all_bn_weights.shape[0] * config['sparsity']) + self.global_threshold = torch.topk(all_bn_weights.view(-1), k, largest=False)[0].max() def calc_mask(self, layer, config): """ - Supports Conv1d, Conv2d - filter dimensions for Conv1d: - OUT: number of output channel - IN: number of input channel - LEN: filter length - filter dimensions for Conv2d: - OUT: number of output channel - IN: number of input channel - H: filter height - W: filter width + Calculate the mask of given layer. + Scale factors with the smallest absolute value in the BN layer are masked. Parameters ---------- layer : LayerInfo - calculate mask for `layer`'s weight + the layer to instrument the compression operation config : dict - the configuration for generating the mask + layer's pruning config + Returns + ------- + torch.Tensor + mask of the layer's weight """ + weight = layer.module.weight.data - assert 0 <= config.get('sparsity') < 1 - assert layer.type in ['Conv1d', 'Conv2d'] - assert layer.type in config['op_types'] + op_name = layer.name + op_type = layer.type + assert op_type == 'BatchNorm2d', 'SlimPruner only supports 2d batch normalization layer pruning' + if op_name in self.mask_calculated_ops: + assert op_name in self.mask_dict + return self.mask_dict.get(op_name) + mask = torch.ones(weight.size()).type_as(weight) + try: + w_abs = weight.abs() + mask = torch.gt(w_abs, self.global_threshold).type_as(weight) + finally: + self.mask_dict.update({layer.name: mask}) + self.mask_calculated_ops.add(layer.name) - if layer.name in self.epoch_pruned_layers: - assert layer.name in self.mask_dict - return self.mask_dict.get(layer.name) + return mask - masks = torch.ones(weight.size()).type_as(weight) - try: - num_filters = weight.size(0) - num_prune = int(num_filters * config.get('sparsity')) - if num_filters < 2 or num_prune < 1: - return masks - min_gm_idx = self._get_min_gm_kernel_idx(weight, num_prune) - for idx in min_gm_idx: - masks[idx] = 0. - finally: - self.mask_dict.update({layer.name: masks}) - self.epoch_pruned_layers.add(layer.name) +class RankFilterPruner(Pruner): + """ + A structured pruning base class that prunes the filters with the smallest + importance criterion in convolution layers to achieve a preset level of network sparsity. + """ - return masks + def __init__(self, model, config_list): + """ + Parameters + ---------- + model : torch.nn.module + Model to be pruned + config_list : list + support key for each list item: + - sparsity: percentage of convolutional filters to be pruned. + """ - def _get_min_gm_kernel_idx(self, weight, n): - assert len(weight.size()) in [3, 4] + super().__init__(model, config_list) + self.mask_calculated_ops = set() - dist_list = [] - for out_i in range(weight.size(0)): - dist_sum = self._get_distance_sum(weight, out_i) - dist_list.append((dist_sum, out_i)) - min_gm_kernels = sorted(dist_list, key=lambda x: x[0])[:n] - return [x[1] for x in min_gm_kernels] + def _get_mask(self, base_mask, weight, num_prune): + return torch.ones(weight.size()).type_as(weight) - def _get_distance_sum(self, weight, out_idx): + def calc_mask(self, layer, config): """ - Calculate the total distance between a specified filter (by out_idex and in_idx) and - all other filters. - Optimized verision of following naive implementation: - def _get_distance_sum(self, weight, in_idx, out_idx): - w = weight.view(-1, weight.size(-2), weight.size(-1)) - dist_sum = 0. - for k in w: - dist_sum += torch.dist(k, weight[in_idx, out_idx], p=2) - return dist_sum + Calculate the mask of given layer. + Filters with the smallest importance criterion of the kernel weights are masked. Parameters ---------- - weight: Tensor - convolutional filter weight - out_idx: int - output channel index of specified filter, this method calculates the total distance - between this specified filter and all other filters. + layer : LayerInfo + the layer to instrument the compression operation + config : dict + layer's pruning config Returns ------- - float32 - The total distance + torch.Tensor + mask of the layer's weight """ - logger.debug('weight size: %s', weight.size()) - assert len(weight.size()) in [3, 4], 'unsupported weight shape' - - w = weight.view(weight.size(0), -1) - anchor_w = w[out_idx].unsqueeze(0).expand(w.size(0), w.size(1)) - x = w - anchor_w - x = (x * x).sum(-1) - x = torch.sqrt(x) - return x.sum() - def update_epoch(self, epoch): - self.epoch_pruned_layers = set() + weight = layer.module.weight.data + op_name = layer.name + op_type = layer.type + assert 0 <= config.get('sparsity') < 1 + assert op_type in ['Conv1d', 'Conv2d'] + assert op_type in config.get('op_types') + if op_name in self.mask_calculated_ops: + assert op_name in self.mask_dict + return self.mask_dict.get(op_name) + mask = torch.ones(weight.size()).type_as(weight) + try: + filters = weight.size(0) + num_prune = int(filters * config.get('sparsity')) + if filters < 2 or num_prune < 1: + return mask + mask = self._get_mask(mask, weight, num_prune) + finally: + self.mask_dict.update({op_name: mask}) + self.mask_calculated_ops.add(op_name) + return mask.detach() -class L1FilterPruner(Pruner): +class L1FilterPruner(RankFilterPruner): """ A structured pruning algorithm that prunes the filters of smallest magnitude weights sum in the convolution layers to achieve a preset level of network sparsity. @@ -299,107 +312,162 @@ def __init__(self, model, config_list): """ super().__init__(model, config_list) - self.mask_calculated_ops = set() - def calc_mask(self, layer, config): + def _get_mask(self, base_mask, weight, num_prune): """ Calculate the mask of given layer. Filters with the smallest sum of its absolute kernel weights are masked. Parameters ---------- - layer : LayerInfo - the layer to instrument the compression operation - config : dict - layer's pruning config + base_mask : torch.Tensor + The basic mask with the same shape of weight, all item in the basic mask is 1. + weight : torch.Tensor + Layer's weight + num_prune : int + Num of filters to prune Returns ------- torch.Tensor - mask of the layer's weight + Mask of the layer's weight """ - weight = layer.module.weight.data - op_name = layer.name - op_type = layer.type - assert op_type == 'Conv2d', 'L1FilterPruner only supports 2d convolution layer pruning' - if op_name in self.mask_calculated_ops: - assert op_name in self.mask_dict - return self.mask_dict.get(op_name) - mask = torch.ones(weight.size()).type_as(weight) - try: - filters = weight.shape[0] - w_abs = weight.abs() - k = int(filters * config['sparsity']) - if k == 0: - return torch.ones(weight.shape).type_as(weight) - w_abs_structured = w_abs.view(filters, -1).sum(dim=1) - threshold = torch.topk(w_abs_structured.view(-1), k, largest=False)[0].max() - mask = torch.gt(w_abs_structured, threshold)[:, None, None, None].expand_as(weight).type_as(weight) - finally: - self.mask_dict.update({layer.name: mask}) - self.mask_calculated_ops.add(layer.name) + filters = weight.shape[0] + w_abs = weight.abs() + w_abs_structured = w_abs.view(filters, -1).sum(dim=1) + threshold = torch.topk(w_abs_structured.view(-1), num_prune, largest=False)[0].max() + mask = torch.gt(w_abs_structured, threshold)[:, None, None, None].expand_as(weight).type_as(weight) return mask -class SlimPruner(Pruner): +class L2FilterPruner(RankFilterPruner): """ - A structured pruning algorithm that prunes channels by pruning the weights of BN layers. - Zhuang Liu, Jianguo Li, Zhiqiang Shen, Gao Huang, Shoumeng Yan and Changshui Zhang - "Learning Efficient Convolutional Networks through Network Slimming", 2017 ICCV - https://arxiv.org/pdf/1708.06519.pdf + A structured pruning algorithm that prunes the filters with the + smallest L2 norm of the absolute kernel weights are masked. """ def __init__(self, model, config_list): """ Parameters ---------- + model : torch.nn.module + Model to be pruned config_list : list support key for each list item: - sparsity: percentage of convolutional filters to be pruned. """ super().__init__(model, config_list) - self.mask_calculated_ops = set() - weight_list = [] - if len(config_list) > 1: - logger.warning('Slim pruner only supports 1 configuration') - config = config_list[0] - for (layer, config) in self.detect_modules_to_compress(): - assert layer.type == 'BatchNorm2d', 'SlimPruner only supports 2d batch normalization layer pruning' - weight_list.append(layer.module.weight.data.abs().clone()) - all_bn_weights = torch.cat(weight_list) - k = int(all_bn_weights.shape[0] * config['sparsity']) - self.global_threshold = torch.topk(all_bn_weights.view(-1), k, largest=False)[0].max() - def calc_mask(self, layer, config): + def _get_mask(self, base_mask, weight, num_prune): """ Calculate the mask of given layer. - Scale factors with the smallest absolute value in the BN layer are masked. + Filters with the smallest L2 norm of the absolute kernel weights are masked. Parameters ---------- - layer : LayerInfo - the layer to instrument the compression operation - config : dict - layer's pruning config + base_mask : torch.Tensor + The basic mask with the same shape of weight, all item in the basic mask is 1. + weight : torch.Tensor + Layer's weight + num_prune : int + Num of filters to prune Returns ------- torch.Tensor - mask of the layer's weight + Mask of the layer's weight """ - - weight = layer.module.weight.data - op_name = layer.name - op_type = layer.type - assert op_type == 'BatchNorm2d', 'SlimPruner only supports 2d batch normalization layer pruning' - if op_name in self.mask_calculated_ops: - assert op_name in self.mask_dict - return self.mask_dict.get(op_name) - mask = torch.ones(weight.size()).type_as(weight) - try: - w_abs = weight.abs() - mask = torch.gt(w_abs, self.global_threshold).type_as(weight) - finally: - self.mask_dict.update({layer.name: mask}) - self.mask_calculated_ops.add(layer.name) + filters = weight.shape[0] + w = weight.view(filters, -1) + w_l2_norm = torch.sqrt((w ** 2).sum(dim=1)) + threshold = torch.topk(w_l2_norm.view(-1), num_prune, largest=False)[0].max() + mask = torch.gt(w_l2_norm, threshold)[:, None, None, None].expand_as(weight).type_as(weight) return mask + + +class FPGMPruner(RankFilterPruner): + """ + A filter pruner via geometric median. + "Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration", + https://arxiv.org/pdf/1811.00250.pdf + """ + + def __init__(self, model, config_list): + """ + Parameters + ---------- + model : pytorch model + the model user wants to compress + config_list: list + support key for each list item: + - sparsity: percentage of convolutional filters to be pruned. + """ + super().__init__(model, config_list) + + def _get_mask(self, base_mask, weight, num_prune): + """ + Calculate the mask of given layer. + Filters with the smallest sum of its absolute kernel weights are masked. + Parameters + ---------- + base_mask : torch.Tensor + The basic mask with the same shape of weight, all item in the basic mask is 1. + weight : torch.Tensor + Layer's weight + num_prune : int + Num of filters to prune + Returns + ------- + torch.Tensor + Mask of the layer's weight + """ + min_gm_idx = self._get_min_gm_kernel_idx(weight, num_prune) + for idx in min_gm_idx: + base_mask[idx] = 0. + return base_mask + + def _get_min_gm_kernel_idx(self, weight, n): + assert len(weight.size()) in [3, 4] + + dist_list = [] + for out_i in range(weight.size(0)): + dist_sum = self._get_distance_sum(weight, out_i) + dist_list.append((dist_sum, out_i)) + min_gm_kernels = sorted(dist_list, key=lambda x: x[0])[:n] + return [x[1] for x in min_gm_kernels] + + def _get_distance_sum(self, weight, out_idx): + """ + Calculate the total distance between a specified filter (by out_idex and in_idx) and + all other filters. + Optimized verision of following naive implementation: + def _get_distance_sum(self, weight, in_idx, out_idx): + w = weight.view(-1, weight.size(-2), weight.size(-1)) + dist_sum = 0. + for k in w: + dist_sum += torch.dist(k, weight[in_idx, out_idx], p=2) + return dist_sum + Parameters + ---------- + weight: Tensor + convolutional filter weight + out_idx: int + output channel index of specified filter, this method calculates the total distance + between this specified filter and all other filters. + Returns + ------- + float32 + The total distance + """ + logger.debug('weight size: %s', weight.size()) + assert len(weight.size()) in [3, 4], 'unsupported weight shape' + + w = weight.view(weight.size(0), -1) + anchor_w = w[out_idx].unsqueeze(0).expand(w.size(0), w.size(1)) + x = w - anchor_w + x = (x * x).sum(-1) + x = torch.sqrt(x) + return x.sum() + + def update_epoch(self, epoch): + self.mask_calculated_ops = set() diff --git a/src/sdk/pynni/tests/test_compressor.py b/src/sdk/pynni/tests/test_compressor.py index c803488b2e..0632858cec 100644 --- a/src/sdk/pynni/tests/test_compressor.py +++ b/src/sdk/pynni/tests/test_compressor.py @@ -58,8 +58,9 @@ def test_tf2_func(*args): return test_tf2_func + # for fpgm filter pruner test -w = np.array([[[[i+1]*3]*3]*5 for i in range(10)]) +w = np.array([[[[i + 1] * 3] * 3] * 5 for i in range(10)]) class CompressorTestCase(TestCase): @@ -69,19 +70,19 @@ def test_torch_quantizer_modules_detection(self): config_list = [{ 'quant_types': ['weight'], 'quant_bits': 8, - 'op_types':['Conv2d', 'Linear'] + 'op_types': ['Conv2d', 'Linear'] }, { 'quant_types': ['output'], 'quant_bits': 8, 'quant_start_step': 0, - 'op_types':['ReLU'] + 'op_types': ['ReLU'] }] model.relu = torch.nn.ReLU() quantizer = torch_compressor.QAT_Quantizer(model, config_list) quantizer.compress() modules_to_compress = quantizer.get_modules_to_compress() - modules_to_compress_name = [ t[0].name for t in modules_to_compress] + modules_to_compress_name = [t[0].name for t in modules_to_compress] assert "conv1" in modules_to_compress_name assert "conv2" in modules_to_compress_name assert "fc1" in modules_to_compress_name @@ -179,7 +180,8 @@ def test_torch_l1filter_pruner(self): w = np.array([np.zeros((3, 3, 3)), np.ones((3, 3, 3)), np.ones((3, 3, 3)) * 2, np.ones((3, 3, 3)) * 3, np.ones((3, 3, 3)) * 4]) model = TorchModel() - config_list = [{'sparsity': 0.2, 'op_names': ['conv1']}, {'sparsity': 0.6, 'op_names': ['conv2']}] + config_list = [{'sparsity': 0.2, 'op_types': ['Conv2d'], 'op_names': ['conv1']}, + {'sparsity': 0.6, 'op_types': ['Conv2d'], 'op_names': ['conv2']}] pruner = torch_compressor.L1FilterPruner(model, config_list) model.conv1.weight.data = torch.tensor(w).float() @@ -236,12 +238,12 @@ def test_torch_QAT_quantizer(self): config_list = [{ 'quant_types': ['weight'], 'quant_bits': 8, - 'op_types':['Conv2d', 'Linear'] + 'op_types': ['Conv2d', 'Linear'] }, { 'quant_types': ['output'], 'quant_bits': 8, 'quant_start_step': 0, - 'op_types':['ReLU'] + 'op_types': ['ReLU'] }] model.relu = torch.nn.ReLU() quantizer = torch_compressor.QAT_Quantizer(model, config_list) @@ -253,7 +255,7 @@ def test_torch_QAT_quantizer(self): quantize_weight = quantizer.quantize_weight(weight, config_list[0], model.conv2) assert math.isclose(model.conv2.scale, 5 / 255, abs_tol=eps) assert model.conv2.zero_point == 0 - # range including 0 + # range including 0 weight = torch.tensor([[-1, 2], [3, 5]]).float() quantize_weight = quantizer.quantize_weight(weight, config_list[0], model.conv2) assert math.isclose(model.conv2.scale, 6 / 255, abs_tol=eps) @@ -271,5 +273,6 @@ def test_torch_QAT_quantizer(self): assert math.isclose(model.relu.tracked_min_biased, 0.002, abs_tol=eps) assert math.isclose(model.relu.tracked_max_biased, 0.00998, abs_tol=eps) + if __name__ == '__main__': main() From 659480f2ed3a853611788bc06dd0e38b910f6619 Mon Sep 17 00:00:00 2001 From: Yan Ni Date: Thu, 12 Dec 2019 10:35:48 +0800 Subject: [PATCH 4/4] Fix doc build warning (#1799) * fix doc build warnings * update docstring guide * fix doc build warning #2 * remove typing.Dict * update * fix dead link * remove deprecated docs * fix missing link warning * fix link issue after merge * fix docstring indentation warning * remove trial.py * revert commit for deadlink of outdated docs * fix pylint error --- docs/en_US/AdvancedFeature/AdvancedNas.md | 91 ------- .../AdvancedFeature/GeneralNasInterfaces.md | 234 ------------------ .../CommunitySharings/community_sharings.rst | 1 + docs/en_US/Compressor/AutoCompression.md | 2 +- docs/en_US/NAS/NasInterface.md | 4 +- docs/en_US/NAS/Overview.md | 2 - docs/en_US/Release.md | 6 +- docs/en_US/TrialExample/RocksdbExamples.md | 14 +- docs/en_US/Tuner/BuiltinTuner.md | 2 +- docs/en_US/Tuner/CustomizeAdvisor.md | 2 +- docs/en_US/Tutorial/Contributing.md | 2 + docs/en_US/examples.rst | 1 + docs/en_US/feature_engineering.rst | 3 +- docs/en_US/nas.rst | 4 +- docs/en_US/reference.rst | 1 + docs/en_US/training_services.rst | 3 +- src/sdk/pynni/nni/batch_tuner/batch_tuner.py | 16 +- src/sdk/pynni/nni/msg_dispatcher_base.py | 30 ++- 18 files changed, 52 insertions(+), 366 deletions(-) delete mode 100644 docs/en_US/AdvancedFeature/AdvancedNas.md delete mode 100644 docs/en_US/AdvancedFeature/GeneralNasInterfaces.md diff --git a/docs/en_US/AdvancedFeature/AdvancedNas.md b/docs/en_US/AdvancedFeature/AdvancedNas.md deleted file mode 100644 index 99d0d9c213..0000000000 --- a/docs/en_US/AdvancedFeature/AdvancedNas.md +++ /dev/null @@ -1,91 +0,0 @@ -# Tutorial for Advanced Neural Architecture Search -Currently many of the NAS algorithms leverage the technique of **weight sharing** among trials to accelerate its training process. For example, [ENAS][1] delivers 1000x effiency with '_parameter sharing between child models_', compared with the previous [NASNet][2] algorithm. Other NAS algorithms such as [DARTS][3], [Network Morphism][4], and [Evolution][5] is also leveraging, or has the potential to leverage weight sharing. - -This is a tutorial on how to enable weight sharing in NNI. - -## Weight Sharing among trials -Currently we recommend sharing weights through NFS (Network File System), which supports sharing files across machines, and is light-weighted, (relatively) efficient. We also welcome contributions from the community on more efficient techniques. - -### Weight Sharing through NFS file -With the NFS setup (see below), trial code can share model weight through loading & saving files. Here we recommend that user feed the tuner with the storage path: - -```yaml -tuner: - codeDir: path/to/customer_tuner - classFileName: customer_tuner.py - className: CustomerTuner - classArgs: - ... - save_dir_root: /nfs/storage/path/ -``` - -And let tuner decide where to save & load weights and feed the paths to trials through `nni.get_next_parameters()`: - -drawing - - For example, in tensorflow: -```python -# save models -saver = tf.train.Saver() -saver.save(sess, os.path.join(params['save_path'], 'model.ckpt')) -# load models -tf.init_from_checkpoint(params['restore_path']) -``` -where `'save_path'` and `'restore_path'` in hyper-parameter can be managed by the tuner. - -### NFS Setup -NFS follows the Client-Server Architecture, with an NFS server providing physical storage, trials on the remote machine with an NFS client can read/write those files in the same way that they access local files. - -#### NFS Server -An NFS server can be any machine as long as it can provide enough physical storage, and network connection with **remote machine** for NNI trials. Usually you can choose one of the remote machine as NFS Server. - -On Ubuntu, install NFS server through `apt-get`: -```bash -sudo apt-get install nfs-kernel-server -``` -Suppose `/tmp/nni/shared` is used as the physical storage, then run: -```bash -mkdir -p /tmp/nni/shared -sudo echo "/tmp/nni/shared *(rw,sync,no_subtree_check,no_root_squash)" >> /etc/exports -sudo service nfs-kernel-server restart -``` -You can check if the above directory is successfully exported by NFS using `sudo showmount -e localhost` - -#### NFS Client -For a trial on remote machine able to access shared files with NFS, an NFS client needs to be installed. For example, on Ubuntu: -```bash -sudo apt-get install nfs-common -``` -Then create & mount the mounted directory of shared files: -```bash -mkdir -p /mnt/nfs/nni/ -sudo mount -t nfs 10.10.10.10:/tmp/nni/shared /mnt/nfs/nni -``` -where `10.10.10.10` should be replaced by the real IP of NFS server machine in practice. - -## Asynchronous Dispatcher Mode for trial dependency control -The feature of weight sharing enables trials from different machines, in which most of the time **read after write** consistency must be assured. After all, the child model should not load parent model before parent trial finishes training. To deal with this, users can enable **asynchronous dispatcher mode** with `multiThread: true` in `config.yml` in NNI, where the dispatcher assign a tuner thread each time a `NEW_TRIAL` request comes in, and the tuner thread can decide when to submit a new trial by blocking and unblocking the thread itself. For example: -```python - def generate_parameters(self, parameter_id): - self.thread_lock.acquire() - indiv = # configuration for a new trial - self.events[parameter_id] = threading.Event() - self.thread_lock.release() - if indiv.parent_id is not None: - self.events[indiv.parent_id].wait() - - def receive_trial_result(self, parameter_id, parameters, reward): - self.thread_lock.acquire() - # code for processing trial results - self.thread_lock.release() - self.events[parameter_id].set() -``` - -## Examples -For details, please refer to this [simple weight sharing example](https://github.com/Microsoft/nni/tree/master/test/async_sharing_test). We also provided a [practice example](https://github.com/Microsoft/nni/tree/master/examples/trials/weight_sharing/ga_squad) for reading comprehension, based on previous [ga_squad](https://github.com/Microsoft/nni/tree/master/examples/trials/ga_squad) example. - -[1]: https://arxiv.org/abs/1802.03268 -[2]: https://arxiv.org/abs/1707.07012 -[3]: https://arxiv.org/abs/1806.09055 -[4]: https://arxiv.org/abs/1806.10282 -[5]: https://arxiv.org/abs/1703.01041 diff --git a/docs/en_US/AdvancedFeature/GeneralNasInterfaces.md b/docs/en_US/AdvancedFeature/GeneralNasInterfaces.md deleted file mode 100644 index f3850e6188..0000000000 --- a/docs/en_US/AdvancedFeature/GeneralNasInterfaces.md +++ /dev/null @@ -1,234 +0,0 @@ -# NNI Programming Interface for Neural Architecture Search (NAS) - -_*This is an **experimental feature**. Currently, we only implemented the general NAS programming interface. Weight sharing will be supported in the following releases._ - -Automatic neural architecture search is taking an increasingly important role on finding better models. Recent research works have proved the feasibility of automatic NAS, and also found some models that could beat manually designed and tuned models. Some of representative works are [NASNet][2], [ENAS][1], [DARTS][3], [Network Morphism][4], and [Evolution][5]. There are new innovations keeping emerging. However, it takes great efforts to implement those algorithms, and it is hard to reuse code base of one algorithm for implementing another. - -To facilitate NAS innovations (e.g., design/implement new NAS models, compare different NAS models side-by-side), an easy-to-use and flexible programming interface is crucial. - - - -## Programming interface - - A new programming interface for designing and searching for a model is often demanded in two scenarios. 1) When designing a neural network, the designer may have multiple choices for a layer, sub-model, or connection, and not sure which one or a combination performs the best. It would be appealing to have an easy way to express the candidate layers/sub-models they want to try. 2) For the researchers who are working on automatic NAS, they want to have an unified way to express the search space of neural architectures. And making unchanged trial code adapted to different searching algorithms. - - We designed a simple and flexible programming interface based on [NNI annotation](../Tutorial/AnnotationSpec.md). It is elaborated through examples below. - -### Example: choose an operator for a layer - -When designing the following model there might be several choices in the fourth layer that may make this model perform well. In the script of this model, we can use annotation for the fourth layer as shown in the figure. In this annotation, there are five fields in total: - -![](../../img/example_layerchoice.png) - -* __layer_choice__: It is a list of function calls, each function should have defined in user's script or imported libraries. The input arguments of the function should follow the format: `def XXX(inputs, arg2, arg3, ...)`, where inputs is a list with two elements. One is the list of `fixed_inputs`, and the other is a list of the chosen inputs from `optional_inputs`. `conv` and `pool` in the figure are examples of function definition. For the function calls in this list, no need to write the first argument (i.e., input). Note that only one of the function calls are chosen for this layer. -* __fixed_inputs__: It is a list of variables, the variable could be an output tensor from a previous layer. The variable could be `layer_output` of another `nni.mutable_layer` before this layer, or other python variables before this layer. All the variables in this list will be fed into the chosen function in `layer_choice` (as the first element of the input list). -* __optional_inputs__: It is a list of variables, the variable could be an output tensor from a previous layer. The variable could be `layer_output` of another `nni.mutable_layer` before this layer, or other python variables before this layer. Only `optional_input_size` variables will be fed into the chosen function in `layer_choice` (as the second element of the input list). -* __optional_input_size__: It indicates how many inputs are chosen from `input_candidates`. It could be a number or a range. A range [1,3] means it chooses 1, 2, or 3 inputs. -* __layer_output__: The name of the output(s) of this layer, in this case it represents the return of the function call in `layer_choice`. This will be a variable name that can be used in the following python code or `nni.mutable_layer`. - -There are two ways to write annotation for this example. For the upper one, input of the function calls is `[[],[out3]]`. For the bottom one, input is `[[out3],[]]`. - -__Debugging__: We provided an `nnictl trial codegen` command to help debugging your code of NAS programming on NNI. If your trial with trial_id `XXX` in your experiment `YYY` is failed, you could run `nnictl trial codegen YYY --trial_id XXX` to generate an executable code for this trial under your current directory. With this code, you can directly run the trial command without NNI to check why this trial is failed. Basically, this command is to compile your trial code and replace the NNI NAS code with the real chosen layers and inputs. - -### Example: choose input connections for a layer - -Designing connections of layers is critical for making a high performance model. With our provided interface, users could annotate which connections a layer takes (as inputs). They could choose several ones from a set of connections. Below is an example which chooses two inputs from three candidate inputs for `concat`. Here `concat` always takes the output of its previous layer using `fixed_inputs`. - -![](../../img/example_connectchoice.png) - -### Example: choose both operators and connections - -In this example, we choose one from the three operators and choose two connections for it. As there are multiple variables in inputs, we call `concat` at the beginning of the functions. - -![](../../img/example_combined.png) - -### Example: [ENAS][1] macro search space - -To illustrate the convenience of the programming interface, we use the interface to implement the trial code of "ENAS + macro search space". The left figure is the macro search space in ENAS paper. - -![](../../img/example_enas.png) - -## Unified NAS search space specification - -After finishing the trial code through the annotation above, users have implicitly specified the search space of neural architectures in the code. Based on the code, NNI will automatically generate a search space file which could be fed into tuning algorithms. This search space file follows the following JSON format. - -```javascript -{ - "mutable_1": { - "_type": "mutable_layer", - "_value": { - "layer_1": { - "layer_choice": ["conv(ch=128)", "pool", "identity"], - "optional_inputs": ["out1", "out2", "out3"], - "optional_input_size": 2 - }, - "layer_2": { - ... - } - } - } -} -``` - -Accordingly, a specified neural architecture (generated by tuning algorithm) is expressed as follows: - -```javascript -{ - "mutable_1": { - "layer_1": { - "chosen_layer": "pool", - "chosen_inputs": ["out1", "out3"] - }, - "layer_2": { - ... - } - } -} -``` - -With the specification of the format of search space and architecture (choice) expression, users are free to implement various (general) tuning algorithms for neural architecture search on NNI. One future work is to provide a general NAS algorithm. - -## Support of One-Shot NAS - -One-Shot NAS is a popular approach to find good neural architecture within a limited time and resource budget. Basically, it builds a full graph based on the search space, and uses gradient descent to at last find the best subgraph. There are different training approaches, such as [training subgraphs (per mini-batch)][1], [training full graph through dropout][6], [training with architecture weights (regularization)][3]. - -NNI has supported the general NAS as demonstrated above. From users' point of view, One-Shot NAS and NAS have the same search space specification, thus, they could share the same programming interface as demonstrated above, just different training modes. NNI provides four training modes: - -**\*classic_mode\***: this mode is described [above](#ProgInterface), in this mode, each subgraph runs as a trial job. To use this mode, you should enable NNI annotation and specify a tuner for nas in experiment config file. [Here](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nas) is an example to show how to write trial code and the config file. And [here](https://github.com/microsoft/nni/tree/master/examples/tuners/random_nas_tuner) is a simple tuner for nas. - -**\*enas_mode\***: following the training approach in [enas paper][1]. It builds the full graph based on neural architrecture search space, and only activate one subgraph that generated by the controller for each mini-batch. [Detailed Description](#ENASMode). (currently only supported on tensorflow). - -To use enas_mode, you should add one more field in the `trial` config as shown below. -```diff -trial: - command: your command to run the trial - codeDir: the directory where the trial's code is located - gpuNum: the number of GPUs that one trial job needs -+ #choice: classic_mode, enas_mode, oneshot_mode -+ nasMode: enas_mode -``` -Similar to classic_mode, in enas_mode you need to specify a tuner for nas, as it also needs to receive subgraphs from tuner (or controller using the terminology in the paper). Since this trial job needs to receive multiple subgraphs from tuner, each one for a mini-batch, two lines need to be added to the trial code to receive the next subgraph (i.e., `nni.training_update`) and report the result of the current subgraph. Below is an example: -```python -for _ in range(num): - # here receives and enables a new subgraph - """@nni.training_update(tf=tf, session=self.session)""" - loss, _ = self.session.run([loss_op, train_op]) - # report the loss of this mini-batch - """@nni.report_final_result(loss)""" -``` -Here, `nni.training_update` is to do some update on the full graph. In enas_mode, the update means receiving a subgraph and enabling it on the next mini-batch. While in darts_mode, the update means training the architecture weights (details in darts_mode). In enas_mode, you need to pass the imported tensorflow package to `tf` and the session to `session`. - -**\*oneshot_mode\***: following the training approach in [this paper][6]. Different from enas_mode which trains the full graph by training large numbers of subgraphs, in oneshot_mode the full graph is built and dropout is added to candidate inputs and also added to candidate ops' outputs. Then this full graph is trained like other DL models. [Detailed Description](#OneshotMode). (currently only supported on tensorflow). - -To use oneshot_mode, you should add one more field in the `trial` config as shown below. In this mode, though there is no need to use tuner, you still need to specify a tuner (any tuner) in the config file for now. Also, no need to add `nni.training_update` in this mode, because no special processing (or update) is needed during training. -```diff -trial: - command: your command to run the trial - codeDir: the directory where the trial's code is located - gpuNum: the number of GPUs that one trial job needs -+ #choice: classic_mode, enas_mode, oneshot_mode -+ nasMode: oneshot_mode -``` - -**\*darts_mode\***: following the training approach in [this paper][3]. It is similar to oneshot_mode. There are two differences, one is that darts_mode only add architecture weights to the outputs of candidate ops, the other is that it trains model weights and architecture weights in an interleaved manner. [Detailed Description](#DartsMode). - -To use darts_mode, you should add one more field in the `trial` config as shown below. In this mode, though there is no need to use tuner, you still need to specify a tuner (any tuner) in the config file for now. -```diff -trial: - command: your command to run the trial - codeDir: the directory where the trial's code is located - gpuNum: the number of GPUs that one trial job needs -+ #choice: classic_mode, enas_mode, oneshot_mode -+ nasMode: darts_mode -``` - -When using darts_mode, you need to call `nni.training_update` as shown below when architecture weights should be updated. Updating architecture weights need `loss` for updating the weights as well as the training data (i.e., `feed_dict`) for it. -```python -for _ in range(num): - # here trains the architecture weights - """@nni.training_update(tf=tf, session=self.session, loss=loss, feed_dict=feed_dict)""" - loss, _ = self.session.run([loss_op, train_op]) -``` - -**Note:** for enas_mode, oneshot_mode, and darts_mode, NNI only works on the training phase. They also have their own inference phase which is not handled by NNI. For enas_mode, the inference phase is to generate new subgraphs through the controller. For oneshot_mode, the inference phase is sampling new subgraphs randomly and choosing good ones. For darts_mode, the inference phase is pruning a proportion of candidates ops based on architecture weights. - - - -### enas_mode - -In enas_mode, the compiled trial code builds the full graph (rather than subgraph), it receives a chosen architecture and training this architecture on the full graph for a mini-batch, then request another chosen architecture. It is supported by [NNI multi-phase](./MultiPhase.md). - -Specifically, for trials using tensorflow, we create and use tensorflow variable as signals, and tensorflow conditional functions to control the search space (full-graph) to be more flexible, which means it can be changed into different sub-graphs (multiple times) depending on these signals. [Here](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nas/enas_mode) is an example for enas_mode. - - - -### oneshot_mode - -Below is the figure to show where dropout is added to the full graph for one layer in `nni.mutable_layers`, input 1-k are candidate inputs, the four ops are candidate ops. - -![](../../img/oneshot_mode.png) - -As suggested in the [paper][6], a dropout method is implemented to the inputs for every layer. The dropout rate is set to r^(1/k), where 0 < r < 1 is a hyper-parameter of the model (default to be 0.01) and k is number of optional inputs for a specific layer. The higher the fan-in, the more likely each possible input is to be dropped out. However, the probability of dropping out all optional_inputs of a layer is kept constant regardless of its fan-in. Suppose r = 0.05. If a layer has k = 2 optional_inputs then each one will independently be dropped out with probability 0.051/2 ≈ 0.22 and will be retained with probability 0.78. If a layer has k = 7 optional_inputs then each one will independently be dropped out with probability 0.051/7 ≈ 0.65 and will be retained with probability 0.35. In both cases, the probability of dropping out all of the layer's optional_inputs is 5%. The outputs of candidate ops are dropped out through the same way. [Here](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nas/oneshot_mode) is an example for oneshot_mode. - - - -### darts_mode - -Below is the figure to show where architecture weights are added to the full graph for one layer in `nni.mutable_layers`, output of each candidate op is multiplied by a weight which is called architecture weight. - -![](../../img/darts_mode.png) - -In `nni.training_update`, tensorflow MomentumOptimizer is used to train the architecture weights based on the pass `loss` and `feed_dict`. [Here](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nas/darts_mode) is an example for darts_mode. - -### [__TODO__] Multiple trial jobs for One-Shot NAS - -One-Shot NAS usually has only one trial job with the full graph. However, running multiple such trial jobs leads to benefits. For example, in enas_mode multiple trial jobs could share the weights of the full graph to speedup the model training (or converge). Some One-Shot approaches are not stable, running multiple trial jobs increase the possibility of finding better models. - -NNI natively supports running multiple such trial jobs. The figure below shows how multiple trial jobs run on NNI. - -![](../../img/one-shot_training.png) - -============================================================= - -## System design of NAS on NNI - -### Basic flow of experiment execution - -NNI's annotation compiler transforms the annotated trial code to the code that could receive architecture choice and build the corresponding model (i.e., graph). The NAS search space can be seen as a full graph (here, full graph means enabling all the provided operators and connections to build a graph), the architecture chosen by the tuning algorithm is a subgraph in it. By default, the compiled trial code only builds and executes the subgraph. - -![](../../img/nas_on_nni.png) - -The above figure shows how the trial code runs on NNI. `nnictl` processes user trial code to generate a search space file and compiled trial code. The former is fed to tuner, and the latter is used to run trials. - -[Simple example of NAS on NNI](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nas). - -### [__TODO__] Weight sharing - -Sharing weights among chosen architectures (i.e., trials) could speedup model search. For example, properly inheriting weights of completed trials could speedup the converge of new trials. One-Shot NAS (e.g., ENAS, Darts) is more aggressive, the training of different architectures (i.e., subgraphs) shares the same copy of the weights in full graph. - -![](../../img/nas_weight_share.png) - -We believe weight sharing (transferring) plays a key role on speeding up NAS, while finding efficient ways of sharing weights is still a hot research topic. We provide a key-value store for users to store and load weights. Tuners and Trials use a provided KV client lib to access the storage. - -Example of weight sharing on NNI. - -## General tuning algorithms for NAS - -Like hyperparameter tuning, a relatively general algorithm for NAS is required. The general programming interface makes this task easier to some extent. We have an [RL tuner based on PPO algorithm](https://github.com/microsoft/nni/tree/master/src/sdk/pynni/nni/ppo_tuner) for NAS. We expect efforts from community to design and implement better NAS algorithms. - -## [__TODO__] Export best neural architecture and code - -After the NNI experiment is done, users could run `nnictl experiment export --code` to export the trial code with the best neural architecture. - -## Conclusion and Future work - -There could be different NAS algorithms and execution modes, but they could be supported with the same programming interface as demonstrated above. - -There are many interesting research topics in this area, both system and machine learning. - -[1]: https://arxiv.org/abs/1802.03268 -[2]: https://arxiv.org/abs/1707.07012 -[3]: https://arxiv.org/abs/1806.09055 -[4]: https://arxiv.org/abs/1806.10282 -[5]: https://arxiv.org/abs/1703.01041 -[6]: http://proceedings.mlr.press/v80/bender18a/bender18a.pdf diff --git a/docs/en_US/CommunitySharings/community_sharings.rst b/docs/en_US/CommunitySharings/community_sharings.rst index 2f6edf142c..6938000a9b 100644 --- a/docs/en_US/CommunitySharings/community_sharings.rst +++ b/docs/en_US/CommunitySharings/community_sharings.rst @@ -12,3 +12,4 @@ In addtion to the official tutorilas and examples, we encourage community contri Neural Architecture Search Comparison Hyper-parameter Tuning Algorithm Comparsion Parallelizing Optimization for TPE + Automatically tune systems with NNI diff --git a/docs/en_US/Compressor/AutoCompression.md b/docs/en_US/Compressor/AutoCompression.md index 013240167a..463c23d401 100644 --- a/docs/en_US/Compressor/AutoCompression.md +++ b/docs/en_US/Compressor/AutoCompression.md @@ -84,7 +84,7 @@ config_list_agp = [{'initial_sparsity': 0, 'final_sparsity': conv0_sparsity, {'initial_sparsity': 0, 'final_sparsity': conv1_sparsity, 'start_epoch': 0, 'end_epoch': 3, 'frequency': 1,'op_name': 'conv1' },] -PRUNERS = {'level':LevelPruner(model, config_list_level),'agp':AGP_Pruner(model, config_list_agp)} +PRUNERS = {'level':LevelPruner(model, config_list_level), 'agp':AGP_Pruner(model, config_list_agp)} pruner = PRUNERS(params['prune_method']['_name']) pruner.compress() ... # fine tuning diff --git a/docs/en_US/NAS/NasInterface.md b/docs/en_US/NAS/NasInterface.md index 655e5ed834..76dc69087c 100644 --- a/docs/en_US/NAS/NasInterface.md +++ b/docs/en_US/NAS/NasInterface.md @@ -2,8 +2,6 @@ We are trying to support various NAS algorithms with unified programming interface, and it's still in experimental stage. It means the current programing interface might be updated in future. -*previous [NAS annotation](../AdvancedFeature/GeneralNasInterfaces.md) interface will be deprecated soon.* - ## Programming interface for user model The programming interface of designing and searching a model is often demanded in two scenarios. @@ -100,7 +98,7 @@ trainer.export(file='./chosen_arch') Different trainers could have different input arguments depending on their algorithms. Please refer to [each trainer's code](https://github.com/microsoft/nni/tree/master/src/sdk/pynni/nni/nas/pytorch) for detailed arguments. After training, users could export the best one of the found models through `trainer.export()`. No need to start an NNI experiment through `nnictl`. -The supported trainers can be found [here](./Overview.md#supported-one-shot-nas-algorithms). A very simple example using NNI NAS API can be found [here](https://github.com/microsoft/nni/tree/master/examples/nas/simple/train.py). +The supported trainers can be found [here](Overview.md#supported-one-shot-nas-algorithms). A very simple example using NNI NAS API can be found [here](https://github.com/microsoft/nni/tree/master/examples/nas/simple/train.py). ### Classic distributed search diff --git a/docs/en_US/NAS/Overview.md b/docs/en_US/NAS/Overview.md index e43105c515..3426673669 100644 --- a/docs/en_US/NAS/Overview.md +++ b/docs/en_US/NAS/Overview.md @@ -97,8 +97,6 @@ python3 retrain.py --arc-checkpoint ../pdarts/checkpoints/epoch_2.json NOTE, we are trying to support various NAS algorithms with unified programming interface, and it's in very experimental stage. It means the current programing interface may be updated in future. -*previous [NAS annotation](../AdvancedFeature/GeneralNasInterfaces.md) interface will be deprecated soon.* - ### Programming interface The programming interface of designing and searching a model is often demanded in two scenarios. diff --git a/docs/en_US/Release.md b/docs/en_US/Release.md index 1e6f0d8dbc..811a030b1a 100644 --- a/docs/en_US/Release.md +++ b/docs/en_US/Release.md @@ -63,7 +63,7 @@ - Support Auto-Feature generator & selection -Issue#877 -PR #1387 + Provide auto feature interface + Tuner based on beam search - + [Add Pakdd example](./examples/trials/auto-feature-engineering/README.md) + + [Add Pakdd example](https://github.com/microsoft/nni/tree/master/examples/trials/auto-feature-engineering) - Add a parallel algorithm to improve the performance of TPE with large concurrency. -PR #1052 - Support multiphase for hyperband -PR #1257 @@ -91,9 +91,9 @@ * Documentation - Update the docs structure -Issue #1231 - - [Multi phase document improvement](./docs/en_US/AdvancedFeature/MultiPhase.md) -Issue #1233 -PR #1242 + - [Multi phase document improvement](AdvancedFeature/MultiPhase.md) -Issue #1233 -PR #1242 + Add configuration example - - [WebUI description improvement](./docs/en_US/Tutorial/WebUI.md) -PR #1419 + - [WebUI description improvement](Tutorial/WebUI.md) -PR #1419 ### Bug fix diff --git a/docs/en_US/TrialExample/RocksdbExamples.md b/docs/en_US/TrialExample/RocksdbExamples.md index 9423c1e152..6e7d36cc32 100644 --- a/docs/en_US/TrialExample/RocksdbExamples.md +++ b/docs/en_US/TrialExample/RocksdbExamples.md @@ -8,9 +8,9 @@ The performance of RocksDB is highly contingent on its tuning. However, because This example illustrates how to use NNI to search the best configuration of RocksDB for a `fillrandom` benchmark supported by a benchmark tool `db_bench`, which is an official benchmark tool provided by RocksDB itself. Therefore, before running this example, please make sure NNI is installed and [`db_bench`](https://github.com/facebook/rocksdb/wiki/Benchmarking-tools) is in your `PATH`. Please refer to [here](../Tutorial/QuickStart.md) for detailed information about installation and preparing of NNI environment, and [here](https://github.com/facebook/rocksdb/blob/master/INSTALL.md) for compiling RocksDB as well as `db_bench`. -We also provide a simple script [`db_bench_installation.sh`](../../../examples/trials/systems/rocksdb-fillrandom/db_bench_installation.sh) helping to compile and install `db_bench` as well as its dependencies on Ubuntu. Installing RocksDB on other systems can follow the same procedure. +We also provide a simple script [`db_bench_installation.sh`](https://github.com/microsoft/nni/tree/master/examples/trials/systems/rocksdb-fillrandom/db_bench_installation.sh) helping to compile and install `db_bench` as well as its dependencies on Ubuntu. Installing RocksDB on other systems can follow the same procedure. -*code directory: [`example/trials/systems/rocksdb-fillrandom`](../../../examples/trials/systems/rocksdb-fillrandom)* +*code directory: [`example/trials/systems/rocksdb-fillrandom`](https://github.com/microsoft/nni/tree/master/examples/trials/systems/rocksdb-fillrandom)* ## Experiment setup @@ -39,7 +39,7 @@ In this example, the search space is specified by a `search_space.json` file as } ``` -*code directory: [`example/trials/systems/rocksdb-fillrandom/search_space.json`](../../../examples/trials/systems/rocksdb-fillrandom/search_space.json)* +*code directory: [`example/trials/systems/rocksdb-fillrandom/search_space.json`](https://github.com/microsoft/nni/tree/master/examples/trials/systems/rocksdb-fillrandom/search_space.json)* ### Benchmark code @@ -48,7 +48,7 @@ Benchmark code should receive a configuration from NNI manager, and report the c * Use `nni.get_next_parameter()` to get next system configuration. * Use `nni.report_final_result(metric)` to report the benchmark result. -*code directory: [`example/trials/systems/rocksdb-fillrandom/main.py`](../../../examples/trials/systems/rocksdb-fillrandom/main.py)* +*code directory: [`example/trials/systems/rocksdb-fillrandom/main.py`](https://github.com/microsoft/nni/tree/master/examples/trials/systems/rocksdb-fillrandom/main.py)* ### Config file @@ -56,11 +56,11 @@ One could start a NNI experiment with a config file. A config file for NNI is a Here is an example of tuning RocksDB with SMAC algorithm: -*code directory: [`example/trials/systems/rocksdb-fillrandom/config_smac.yml`](../../../examples/trials/systems/rocksdb-fillrandom/config_smac.yml)* +*code directory: [`example/trials/systems/rocksdb-fillrandom/config_smac.yml`](https://github.com/microsoft/nni/tree/master/examples/trials/systems/rocksdb-fillrandom/config_smac.yml)* Here is an example of tuning RocksDB with TPE algorithm: -*code directory: [`example/trials/systems/rocksdb-fillrandom/config_tpe.yml`](../../../examples/trials/systems/rocksdb-fillrandom/config_tpe.yml)* +*code directory: [`example/trials/systems/rocksdb-fillrandom/config_tpe.yml`](https://github.com/microsoft/nni/tree/master/examples/trials/systems/rocksdb-fillrandom/config_tpe.yml)* Other tuners can be easily adopted in the same way. Please refer to [here](../Tuner/BuiltinTuner.md) for more information. @@ -87,7 +87,7 @@ We ran these two examples on the same machine with following details: The detailed experiment results are shown in the below figure. Horizontal axis is sequential order of trials. Vertical axis is the metric, write OPS in this example. Blue dots represent trials for tuning RocksDB with SMAC tuner, and orange dots stand for trials for tuning RocksDB with TPE tuner. -![image](../../../examples/trials/systems/rocksdb-fillrandom/plot.png) +![image](https://github.com/microsoft/nni/tree/master/examples/trials/systems/rocksdb-fillrandom/plot.png) Following table lists the best trials and corresponding parameters and metric obtained by the two tuners. Unsurprisingly, both of them found the same optimal configuration for `fillrandom` benchmark. diff --git a/docs/en_US/Tuner/BuiltinTuner.md b/docs/en_US/Tuner/BuiltinTuner.md index 9dd9085e4c..fc799915c7 100644 --- a/docs/en_US/Tuner/BuiltinTuner.md +++ b/docs/en_US/Tuner/BuiltinTuner.md @@ -43,7 +43,7 @@ TPE, as a black-box optimization, can be used in various scenarios and shows goo * **optimize_mode** (*maximize or minimize, optional, default = maximize*) - If 'maximize', the tuner will target to maximize metrics. If 'minimize', the tuner will target to minimize metrics. -Note: We have optimized the parallelism of TPE for large-scale trial-concurrency. For the principle of optimization or turn-on optimization, please refer to [TPE document](HyperoptTuner.md). +Note: We have optimized the parallelism of TPE for large-scale trial-concurrency. For the principle of optimization or turn-on optimization, please refer to [TPE document](./HyperoptTuner.md). **Usage example:** diff --git a/docs/en_US/Tuner/CustomizeAdvisor.md b/docs/en_US/Tuner/CustomizeAdvisor.md index aefdd959ad..c81207b758 100644 --- a/docs/en_US/Tuner/CustomizeAdvisor.md +++ b/docs/en_US/Tuner/CustomizeAdvisor.md @@ -35,4 +35,4 @@ advisor: ## Example -Here we provide an [example](../../../examples/tuners/mnist_keras_customized_advisor). +Here we provide an [example](https://github.com/microsoft/nni/tree/master/examples/tuners/mnist_keras_customized_advisor). diff --git a/docs/en_US/Tutorial/Contributing.md b/docs/en_US/Tutorial/Contributing.md index 8d1eb6a552..ce91b12344 100644 --- a/docs/en_US/Tutorial/Contributing.md +++ b/docs/en_US/Tutorial/Contributing.md @@ -43,6 +43,8 @@ A person looking to contribute can take up an issue by claiming it as a comment/ * For docstrings, please refer to [numpydoc docstring guide](https://numpydoc.readthedocs.io/en/latest/format.html) and [pandas docstring guide](https://python-sprints.github.io/pandas/guide/pandas_docstring.html) * For function docstring, **description**, **Parameters**, and **Returns**/**Yields** are mandatory. * For class docstring, **description**, **Attributes** are mandatory. + * For docstring to describe `dict`, which is commonly used in our hyper-param format description, please refer to [RiboKit : Doc Standards + - Internal Guideline on Writing Standards](https://ribokit.github.io/docs/text/) ## Documentation Our documentation is built with [sphinx](http://sphinx-doc.org/), supporting [Markdown](https://guides.github.com/features/mastering-markdown/) and [reStructuredText](http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html) format. All our documentations are placed under [docs/en_US](https://github.com/Microsoft/nni/tree/master/docs). diff --git a/docs/en_US/examples.rst b/docs/en_US/examples.rst index b81d983929..57a1547610 100644 --- a/docs/en_US/examples.rst +++ b/docs/en_US/examples.rst @@ -10,3 +10,4 @@ Examples Scikit-learn<./TrialExample/SklearnExamples> EvolutionSQuAD<./TrialExample/SquadEvolutionExamples> GBDT<./TrialExample/GbdtExample> + RocksDB <./TrialExample/RocksdbExamples> diff --git a/docs/en_US/feature_engineering.rst b/docs/en_US/feature_engineering.rst index 588452fc57..bfbcb6f490 100644 --- a/docs/en_US/feature_engineering.rst +++ b/docs/en_US/feature_engineering.rst @@ -1,6 +1,5 @@ -################### Feature Engineering -################### +=================== We are glad to announce the alpha release for Feature Engineering toolkit on top of NNI, it's still in the experiment phase which might evolve based on usage feedback. diff --git a/docs/en_US/nas.rst b/docs/en_US/nas.rst index 92e73e8b78..2228e52d76 100644 --- a/docs/en_US/nas.rst +++ b/docs/en_US/nas.rst @@ -20,6 +20,6 @@ For details, please refer to the following tutorials: Overview NAS Interface - ENAS - DARTS + ENAS + DARTS P-DARTS diff --git a/docs/en_US/reference.rst b/docs/en_US/reference.rst index 8dd84766fb..ee300086f5 100644 --- a/docs/en_US/reference.rst +++ b/docs/en_US/reference.rst @@ -10,3 +10,4 @@ References Configuration Search Space TrainingService + Framework Library diff --git a/docs/en_US/training_services.rst b/docs/en_US/training_services.rst index db6889883b..bfc8d9a746 100644 --- a/docs/en_US/training_services.rst +++ b/docs/en_US/training_services.rst @@ -2,8 +2,9 @@ Introduction to NNI Training Services ===================================== .. toctree:: + Overview <./TrainingService/SupportTrainingService> Local<./TrainingService/LocalMode> Remote<./TrainingService/RemoteMachineMode> OpenPAI<./TrainingService/PaiMode> Kubeflow<./TrainingService/KubeflowMode> - FrameworkController<./TrainingService/FrameworkControllerMode> \ No newline at end of file + FrameworkController<./TrainingService/FrameworkControllerMode> diff --git a/src/sdk/pynni/nni/batch_tuner/batch_tuner.py b/src/sdk/pynni/nni/batch_tuner/batch_tuner.py index d3184c3789..4f73fce945 100644 --- a/src/sdk/pynni/nni/batch_tuner/batch_tuner.py +++ b/src/sdk/pynni/nni/batch_tuner/batch_tuner.py @@ -24,13 +24,15 @@ class BatchTuner(Tuner): Examples -------- The search space only be accepted like: - ``` - { - 'combine_params': { '_type': 'choice', - '_value': '[{...}, {...}, {...}]', - } - } - ``` + + :: + + {'combine_params': + { '_type': 'choice', + '_value': '[{...}, {...}, {...}]', + } + } + """ def __init__(self): diff --git a/src/sdk/pynni/nni/msg_dispatcher_base.py b/src/sdk/pynni/nni/msg_dispatcher_base.py index 632a83968c..e323257d7f 100644 --- a/src/sdk/pynni/nni/msg_dispatcher_base.py +++ b/src/sdk/pynni/nni/msg_dispatcher_base.py @@ -163,19 +163,23 @@ def handle_initialize(self, data): raise NotImplementedError('handle_initialize not implemented') def handle_request_trial_jobs(self, data): - """The message dispatcher is demanded to generate `data` trial jobs. - These trial jobs should be sent via `send(CommandType.NewTrialJob, json_tricks.dumps(parameter))`, - where `parameter` will be received by NNI Manager and eventually accessible to trial jobs as "next parameter". - Semantically, message dispatcher should do this `send` exactly `data` times. + """The message dispatcher is demanded to generate ``data`` trial jobs. + These trial jobs should be sent via ``send(CommandType.NewTrialJob, json_tricks.dumps(parameter))``, + where ``parameter`` will be received by NNI Manager and eventually accessible to trial jobs as "next parameter". + Semantically, message dispatcher should do this ``send`` exactly ``data`` times. The JSON sent by this method should follow the format of - { - "parameter_id": 42 - "parameters": { - // this will be received by trial - }, - "parameter_source": "algorithm" // optional - } + + :: + + { + "parameter_id": 42 + "parameters": { + // this will be received by trial + }, + "parameter_source": "algorithm" // optional + } + Parameters ---------- data: int @@ -211,6 +215,7 @@ def handle_add_customized_trial(self, data): def handle_report_metric_data(self, data): """Called when metric data is reported or new parameters are requested (for multiphase). When new parameters are requested, this method should send a new parameter. + Parameters ---------- data: dict @@ -219,6 +224,7 @@ def handle_report_metric_data(self, data): `REQUEST_PARAMETER` is used to request new parameters for multiphase trial job. In this case, the dict will contain additional keys: `trial_job_id`, `parameter_index`. Refer to `msg_dispatcher.py` as an example. + Raises ------ ValueError @@ -228,6 +234,7 @@ def handle_report_metric_data(self, data): def handle_trial_end(self, data): """Called when the state of one of the trials is changed + Parameters ---------- data: dict @@ -235,5 +242,6 @@ def handle_trial_end(self, data): trial_job_id: the id generated by training service. event: the job’s state. hyper_params: the string that is sent by message dispatcher during the creation of trials. + """ raise NotImplementedError('handle_trial_end not implemented')