From 9bd26aa664cea607dad30ead1de76939e596cd7d Mon Sep 17 00:00:00 2001 From: pparke Date: Tue, 2 Apr 2024 19:40:16 -0300 Subject: [PATCH 1/9] add class fields to MultiHeadSelfAttention --- src/layers.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/layers.ts b/src/layers.ts index 94bff11..68e844a 100644 --- a/src/layers.ts +++ b/src/layers.ts @@ -119,6 +119,16 @@ export class Linear extends Module { } export class MultiHeadSelfAttention extends Module { + public Wk: Linear; + public Wq: Linear; + public Wv: Linear; + public residual_proj: Linear; + public mask: Tensor; + public att_dropout: Dropout; + public residual_dropout: Dropout; + public softmax: Softmax; + public H: number; + /** * Full transformer Layer implementation. * From 5692e7ffbc50b49de2bbb3ebf1d7ed6e663289c6 Mon Sep 17 00:00:00 2001 From: pparke Date: Tue, 2 Apr 2024 19:41:12 -0300 Subject: [PATCH 2/9] add class fields to Linear --- src/layers.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/layers.ts b/src/layers.ts index 68e844a..eaa2f86 100644 --- a/src/layers.ts +++ b/src/layers.ts @@ -89,6 +89,10 @@ export class Module implements ModuleInterface { // Standard Layers: export class Linear extends Module { + public W: Tensor; + public b: Tensor; + public has_bias: boolean; + /** * Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity. * From bbe12a4a654284bab15a36fdb4b34eb836aa2885 Mon Sep 17 00:00:00 2001 From: pparke Date: Tue, 2 Apr 2024 19:42:29 -0300 Subject: [PATCH 3/9] add mode to ModuleInterface --- src/layers.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/layers.ts b/src/layers.ts index eaa2f86..55c62b4 100644 --- a/src/layers.ts +++ b/src/layers.ts @@ -23,6 +23,7 @@ interface ModuleInterface { train(): void; eval(): void; entries(): [string, Module | Parameter | Tensor | any][]; + mode: "train" | "eval"; } // Module class: From ad55809b57196bc9f2111ccdc65c91f9bc5cff9b Mon Sep 17 00:00:00 2001 From: pparke Date: Tue, 2 Apr 2024 19:43:12 -0300 Subject: [PATCH 4/9] add class fields to FullyConnected --- src/layers.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/layers.ts b/src/layers.ts index 55c62b4..a7b40a3 100644 --- a/src/layers.ts +++ b/src/layers.ts @@ -215,6 +215,11 @@ export class MultiHeadSelfAttention extends Module { } export class FullyConnected extends Module { + public l1: Linear; + public relu: ReLU; + public l2: Linear; + public dropout: Dropout; + /** * Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer. * From d3c356378fa2a169eb5ff34df4610346595203b9 Mon Sep 17 00:00:00 2001 From: pparke Date: Tue, 2 Apr 2024 19:43:48 -0300 Subject: [PATCH 5/9] add class fields to Block --- src/layers.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/layers.ts b/src/layers.ts index a7b40a3..4610080 100644 --- a/src/layers.ts +++ b/src/layers.ts @@ -251,6 +251,11 @@ export class FullyConnected extends Module { } export class Block extends Module { + public att: MultiHeadSelfAttention; + public ln1: LayerNorm; + public fcc: FullyConnected; + public ln2: LayerNorm; + /** * Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms. * From 882e2997b95a817cfe10492ff048612e03ba141a Mon Sep 17 00:00:00 2001 From: pparke Date: Tue, 2 Apr 2024 19:46:02 -0300 Subject: [PATCH 6/9] move jsdoc commens above class so they appear on hover --- src/layers.ts | 67 ++++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/src/layers.ts b/src/layers.ts index 4610080..0867925 100644 --- a/src/layers.ts +++ b/src/layers.ts @@ -89,19 +89,20 @@ export class Module implements ModuleInterface { } // Standard Layers: + +/** + * Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity. + * + * @param {number} in_size - size of the last dimention of the input array. + * @param {number} out_size - size of the last dimention of the output array. + * @param {boolean} bias - wether to include a bias term. + * @param {boolean} xavier - Wether to use xavier initialization (divide by square root of first input dimension). + */ export class Linear extends Module { public W: Tensor; public b: Tensor; public has_bias: boolean; - /** - * Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity. - * - * @param {number} in_size - size of the last dimention of the input array. - * @param {number} out_size - size of the last dimention of the output array. - * @param {boolean} bias - wether to include a bias term. - * @param {boolean} xavier - Wether to use xavier initialization (divide by square root of first input dimension). - */ constructor(in_size: number, out_size: number, bias = true, xavier = true) { super(); this.W = randn([in_size, out_size], true, xavier); @@ -123,6 +124,15 @@ export class Linear extends Module { } } +/** + * Full transformer Layer implementation. + * + * @param {number} in_size - size of the last dimention of the input array. + * @param {number} out_size - size of the last dimention of the output array. + * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size). + * @param {number} n_timesteps - length of text sequence to be processed bt Transformer. + * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer. + */ export class MultiHeadSelfAttention extends Module { public Wk: Linear; public Wq: Linear; @@ -134,15 +144,6 @@ export class MultiHeadSelfAttention extends Module { public softmax: Softmax; public H: number; - /** - * Full transformer Layer implementation. - * - * @param {number} in_size - size of the last dimention of the input array. - * @param {number} out_size - size of the last dimention of the output array. - * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size). - * @param {number} n_timesteps - length of text sequence to be processed bt Transformer. - * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer. - */ constructor( in_size: number, out_size: number, @@ -214,19 +215,19 @@ export class MultiHeadSelfAttention extends Module { } } +/** + * Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer. + * + * @param {number} in_size - size of the last dimention of the input array. + * @param {number} out_size - size of the last dimention of the output array. + * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer. + */ export class FullyConnected extends Module { public l1: Linear; public relu: ReLU; public l2: Linear; public dropout: Dropout; - /** - * Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer. - * - * @param {number} in_size - size of the last dimention of the input array. - * @param {number} out_size - size of the last dimention of the output array. - * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer. - */ constructor(in_size: number, out_size: number, dropout_prob = 0) { super(); @@ -250,21 +251,21 @@ export class FullyConnected extends Module { } } +/** + * Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms. + * + * @param {number} in_size - size of the last dimention of the input array. + * @param {number} out_size - size of the last dimention of the output array. + * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size). + * @param {number} n_timesteps - length of text sequence to be processed bt Transformer. + * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer. + */ export class Block extends Module { public att: MultiHeadSelfAttention; public ln1: LayerNorm; public fcc: FullyConnected; public ln2: LayerNorm; - /** - * Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms. - * - * @param {number} in_size - size of the last dimention of the input array. - * @param {number} out_size - size of the last dimention of the output array. - * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size). - * @param {number} n_timesteps - length of text sequence to be processed bt Transformer. - * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer. - */ constructor( in_size: number, out_size: number, From f823bd6ab39ea4f0c14f3187990641a063dbfe54 Mon Sep 17 00:00:00 2001 From: pparke Date: Tue, 2 Apr 2024 19:49:19 -0300 Subject: [PATCH 7/9] prettier formatting and made index2 in Tensor.at() optional according to the comment and usage in the lib --- src/tensor.ts | 154 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 109 insertions(+), 45 deletions(-) diff --git a/src/tensor.ts b/src/tensor.ts index 4868e26..c6fa690 100644 --- a/src/tensor.ts +++ b/src/tensor.ts @@ -25,7 +25,7 @@ export class Tensor { } else if (typeof data === "number") { this._data = [data]; } else { - throw Error('Your argument "data" is not a number or an iterable.') + throw Error('Your argument "data" is not a number or an iterable.'); } this.shape = getShape(data); this.requires_grad = requires_grad; @@ -34,7 +34,7 @@ export class Tensor { if (this.requires_grad) { this._grad = zeros(this.shape); } - + // Graph connections: this.children = []; this.parents = []; @@ -194,7 +194,9 @@ export class Tensor { return this.add(-other); } else if (other instanceof Tensor) { return this.add(other.neg()); - } else { throw Error('Argument "other" is not a Tensor or a number.') } + } else { + throw Error('Argument "other" is not a Tensor or a number.'); + } } /** @@ -218,8 +220,8 @@ export class Tensor { /** * Divide this Tensor by integer or other Tensor. - * @param {any} other - Tensor or integer to divide this Tensor by. - * @returns {object} New tensor. + * @param {Tensor | number} other - Tensor or integer to divide this Tensor by. + * @returns {Tensor} New tensor. */ div(other: Tensor | number): Tensor { const operation = new Div(); @@ -229,7 +231,7 @@ export class Tensor { /** * Multiply this Tensor by integer or other Tensor. * @param {Tensor | number} other - Tensor or integer to multiply this Tensor by. - * @returns {object} New tensor. + * @returns {Tensor} New tensor. */ matmul(other: Tensor): Tensor { const operation = new MatMul(); @@ -301,7 +303,7 @@ export class Tensor { * [1,1,2,3]]) * a.at([0,1,0]) */ - at(index1: Tensor | Array, index2: Tensor | Array): Tensor { + at(index1: Tensor | Array, index2?: Tensor | Array): Tensor { const operation = new At(); return operation.forward(this, index1, index2); } @@ -320,9 +322,11 @@ export class Tensor { * // [0,0,2,0]]) * a.masked_fill(mask, (el) => {return el > 3}, 0) */ - masked_fill(mask: Tensor, - condition: (someArg: number) => boolean, - value: number) { + masked_fill( + mask: Tensor, + condition: (someArg: number) => boolean, + value: number + ) { const operation = new MaskedFill(); return operation.forward(this, mask, condition, value); } @@ -594,16 +598,16 @@ export class MatMul { requiresGrad(a) || requiresGrad(b) // requires_grad; ); - // Connect nodes in graph: - if (a instanceof Tensor && requiresGrad(a)) { - z.parents.push(a); - a.children.push(z); - } - if (b instanceof Tensor && requiresGrad(b)) { - z.parents.push(b); - b.children.push(z); - } - z.operation = this; + // Connect nodes in graph: + if (a instanceof Tensor && requiresGrad(a)) { + z.parents.push(a); + a.children.push(z); + } + if (b instanceof Tensor && requiresGrad(b)) { + z.parents.push(b); + b.children.push(z); + } + z.operation = this; return z; } @@ -1039,7 +1043,11 @@ export class Transpose { export class At { cache: any; - forward(a: Tensor, idx1: Tensor | Array, idx2: Tensor | Array | null = null): Tensor { + forward( + a: Tensor, + idx1: Tensor | Array, + idx2: Tensor | Array | null = null + ): Tensor { // Make sure index lists are flat JavaScript arrays: if (idx1) { idx1 = assureArray(idx1).flat(Infinity); @@ -1094,7 +1102,12 @@ export class At { export class MaskedFill { cache: any; - forward(a: Tensor, mask: Tensor, condition: (someArg: number) => boolean, value: number): Tensor { + forward( + a: Tensor, + mask: Tensor, + condition: (someArg: number) => boolean, + value: number + ): Tensor { // Build cache to use in backward step: this.cache = [a, mask, condition]; @@ -1321,7 +1334,11 @@ export function transpose(a: Tensor, dim1: number, dim2: number): Tensor { * // Returns tensor([2,6,8]): * a.at([0,1,1], [2,0,2]) */ -export function at(a: Tensor, idx1: Tensor | Array, idx2: Tensor | Array): Tensor { +export function at( + a: Tensor, + idx1: Tensor | Array, + idx2: Tensor | Array +): Tensor { return a.at(idx1, idx2); } @@ -1340,7 +1357,12 @@ export function at(a: Tensor, idx1: Tensor | Array, idx2: Tensor | Array {return el > 3}, 0) */ -export function masked_fill(a: Tensor, mask: Tensor, condition: (someArg: number) => boolean, value: number): Tensor { +export function masked_fill( + a: Tensor, + mask: Tensor, + condition: (someArg: number) => boolean, + value: number +): Tensor { return a.masked_fill(mask, condition, value); } @@ -1428,9 +1450,9 @@ function _add(a: Array | number, b: Array | number): any { // If both are numbers, return number. If one is a Tensor, add number to each element in tensor. if (typeof a === "number" && typeof b === "number") { return a + b; - } else if (typeof a === 'number' && b instanceof Array) { + } else if (typeof a === "number" && b instanceof Array) { return b.map((element) => _add(element, a)); - } else if (a instanceof Array && typeof b === 'number') { + } else if (a instanceof Array && typeof b === "number") { return a.map((element) => _add(element, b)); } else if (a instanceof Array && b instanceof Array) { // If both are tensors, we need to broadcast: @@ -1477,11 +1499,15 @@ function _add(a: Array | number, b: Array | number): any { } else { return b.map((element) => _add(a, element)); } - } else {throw Error('Given arguments cannot be added.')} - } else {throw Error('Given arguments cannot be added.')} + } else { + throw Error("Given arguments cannot be added."); + } + } else { + throw Error("Given arguments cannot be added."); + } } -function _neg(a: Array | number): Array | number{ +function _neg(a: Array | number): Array | number { // If a is a number, make it negative. If not, make all of its elements negative: if (typeof a === "number") { return -a; @@ -1613,8 +1639,10 @@ function _matmul(a: Array, b: Array): Array { } // If this dimension has equal lengths, keep searching: if (typeof a[0][0] === "object") { - return a.map((element: Array, idx: number) => _matmul(element, b[idx])); - // If not, try to matmul: + return a.map((element: Array, idx: number) => + _matmul(element, b[idx]) + ); + // If not, try to matmul: } else { // If dimensions align, perform matmul: if (a[0].length === b.length && typeof a[0][0] === "number") { @@ -1664,7 +1692,7 @@ function _sqrt(a: Array | number): Array | number { } } -function _exp(a:Array | number): Array | number { +function _exp(a: Array | number): Array | number { // If a is a number, exponentiate it. If not, exponentiate all of its elements: if (typeof a === "number") { return 2.718281828459045 ** a; @@ -1707,7 +1735,11 @@ function _transpose(a: Array, dim: number): Array { } } -function _at(a: Array, idx1: Array, idx2: Array | null): Array { +function _at( + a: Array, + idx1: Array, + idx2: Array | null +): Array { // If there is a second index, fill a new array in position "N" with a[idx1[N]][idx2[N]] (2 Dims): if (idx2) { return Array(idx1.length) @@ -1721,7 +1753,12 @@ function _at(a: Array, idx1: Array, idx2: Array | null): Array | number, mask: Array | number, condition: (someArg: number) => boolean, value: number): Array | number { +function _masked_fill( + a: Array | number, + mask: Array | number, + condition: (someArg: number) => boolean, + value: number +): Array | number { // If a is a number, test "condition" on it. If not, recursive step to all of its elements: if (typeof mask === "number") { if (typeof a != "number") { @@ -1767,7 +1804,10 @@ export function _reshape(a: Array, shape: Array): Array { * @param {function} valueFunc - Function that returns number to fill up the Tensor. * @returns {object} New tensor. */ -function _tensorInitializer(shape: Array, valueFunc: () => number): Array { +function _tensorInitializer( + shape: Array, + valueFunc: () => number +): Array { if (shape.length === 1) { const emptyArray = Array(shape[0]).fill(0); return emptyArray.map(() => valueFunc()); @@ -1853,7 +1893,11 @@ export function rand(shape: Array, requires_grad = false): Tensor { * @param {boolean} xavier - Whether to use xavier initialization (divide by square root of first input dimension). * @returns {object} New tensor. */ -export function randn(shape: Array, requires_grad = false, xavier = false): Tensor { +export function randn( + shape: Array, + requires_grad = false, + xavier = false +): Tensor { return new Tensor( _tensorInitializer(shape, () => { const mean = Math.random() + 0.00001; @@ -1879,12 +1923,17 @@ export function randn(shape: Array, requires_grad = false, xavier = fals * @param {boolean} requires_grad - Whether to keep track of this tensor's gradients. * @returns {object} New tensor. */ -export function randint(low = 0, high = 1, shape = [1], requires_grad = false): Tensor { +export function randint( + low = 0, + high = 1, + shape = [1], + requires_grad = false +): Tensor { return new Tensor( _tensorInitializer(shape, () => { return Math.floor(Math.random() * (high - low)) + low; }), - (requires_grad) + requires_grad ); } @@ -1917,7 +1966,10 @@ export function requiresGrad(a: Tensor | number | Array): boolean { * broadcast(ones([5,3,2]), ones([4,5,3,1])); */ export function broadcast(a: Tensor, b: Tensor): Tensor { - function _broadcast(out: Array | number, b: Array | number): Array | number { + function _broadcast( + out: Array | number, + b: Array | number + ): Array | number { if (typeof out === "number" && typeof b === "number") { return out; } else if (typeof out === "number" && b instanceof Array) { @@ -1928,7 +1980,6 @@ export function broadcast(a: Tensor, b: Tensor): Tensor { } else if (JSON.stringify(getShape(out)) === JSON.stringify(getShape(b))) { return out; } else if (out instanceof Array && b instanceof Array) { - // If both are tensors, we need to broadcast: const outShape = getShape(out); const bShape = getShape(b); @@ -1975,7 +2026,10 @@ export function broadcast(a: Tensor, b: Tensor): Tensor { } } else { // Define recursive function to find dimension with length 1: - const _broadcastSideways = (out: Array | number | null, b: Array): Array => { + const _broadcastSideways = ( + out: Array | number | null, + b: Array + ): Array => { if (out instanceof Array && b.length != out.length) { if (b.length === 1) { // Base case, contract existing dimension: @@ -2001,13 +2055,17 @@ export function broadcast(a: Tensor, b: Tensor): Tensor { return [null].map((element, idx) => _broadcastSideways(element, b[idx]) ); - } else {throw Error('Shapes not broadcastable.')} + } else { + throw Error("Shapes not broadcastable."); + } } }; // Return final broadcast tensor: return _broadcastSideways(out, b); } - } else { throw Error ("Shapes not broadcastable.")} + } else { + throw Error("Shapes not broadcastable."); + } } let out = a.data; @@ -2026,8 +2084,14 @@ export function broadcast(a: Tensor, b: Tensor): Tensor { * // Returns tensor with shape [4,2,3]: * broadcastUp(ones([2,3]), ones([4,3,2])); */ -export function broadcastUp(inElement: Array, outElement: Array): Array { - function _broadcastUp(inElement: Array, outElement: Array): Array { +export function broadcastUp( + inElement: Array, + outElement: Array +): Array { + function _broadcastUp( + inElement: Array, + outElement: Array + ): Array { if (getShape(inElement).length + 1 === getShape(outElement).length) { // Base case, create new dimension: const emptyArray = Array(outElement.length).fill(zeros); From 2f7255eae159aaa7161414d85355076193a5a584 Mon Sep 17 00:00:00 2001 From: pparke Date: Tue, 2 Apr 2024 19:52:36 -0300 Subject: [PATCH 8/9] added remaining class fields and moved comment blocks --- src/layers.ts | 75 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/src/layers.ts b/src/layers.ts index 0867925..4bf4f2e 100644 --- a/src/layers.ts +++ b/src/layers.ts @@ -301,13 +301,16 @@ export class Block extends Module { } // Embedding Layers + +/** + * Embedding class, turns indexes into vectors. + * + * @param {number} in_size - number of different indexes (vocabulary size). + * @param {number} out_size - size of the embedding vector generated. + */ export class Embedding extends Module { - /** - * Embedding class, turns indexes into vectors. - * - * @param {number} in_size - number of different indexes (vocabulary size). - * @param {number} out_size - size of the embedding vector generated. - */ + public E: Tensor; + constructor(in_size: number, embed_size: number) { super(); this.E = randn([in_size, embed_size], true, false); @@ -331,13 +334,15 @@ export class Embedding extends Module { } } +/** + * Embedding class, turns indexes into vectors. + * + * @param {number} n_timesteps - number of different embeddings (number of timesteps in each instance in batch). + * @param {number} embed_size - size of the embedding vector generated. + */ export class PositionalEmbedding extends Module { - /** - * Embedding class, turns indexes into vectors. - * - * @param {number} n_timesteps - number of different embeddings (number of timesteps in each instance in batch). - * @param {number} embed_size - size of the embedding vector generated. - */ + public E: Tensor; + constructor(n_timesteps: number, embed_size: number) { super(); this.E = randn([n_timesteps, embed_size], true, false); @@ -359,10 +364,11 @@ export class PositionalEmbedding extends Module { } // Non-linearity Layers: + +/** + * Rectified Linear Unit nonlinearity. Returns z if z>0 else 0. + */ export class ReLU extends Module { - /** - * Rectified Linear Unit nonlinearity. Returns z if z>0 else 0. - */ constructor() { super(); } @@ -396,10 +402,10 @@ export class ReLU extends Module { } } +/** + * Softmax nonlinearity class. Returns distribution of values (sum=1). + */ export class Softmax extends Module { - /** - * Softmax nonlinearity class. Returns distribution of values (sum=1). - */ constructor() { super(); } @@ -418,12 +424,15 @@ export class Softmax extends Module { } // Regularization Layers: + +/** + * Dropout class, added usually after other layers, to drop values to zero with given probability + * + * @param {number} drop_prob - probability to drop each value in input. + */ export class Dropout extends Module { - /** - * Dropout class, added usually after other layers, to drop values to zero with given probability - * - * @param {number} drop_prob - probability to drop each value in input. - */ + public p: number; + constructor(drop_prob: number) { super(); this.p = drop_prob; @@ -453,12 +462,15 @@ export class Dropout extends Module { } } +/** + * Layer Norm class, added usually after other layers to normalize across all of the output. + * + * @param {number} n_embed - size of the last dimention of the input. + */ export class LayerNorm extends Module { - /** - * Layer Norm class, added usually after other layers to normalize across all of the output. - * - * @param {number} n_embed - size of the last dimention of the input. - */ + public gamma: Tensor; + public beta: Tensor; + constructor(n_embed: number) { super(); this.gamma = ones([n_embed], true); @@ -474,10 +486,11 @@ export class LayerNorm extends Module { } // Loss layers: + +/** + * Cross Entropy Loss class, returns the loss given the output and the expected indexes. + */ export class CrossEntropyLoss extends Module { - /** - * Cross Entropy Loss class, returns the loss given the output and the expected indexes. - */ constructor() { super(); } From 228e8e81fa1c946db030745e7366d49564d45559 Mon Sep 17 00:00:00 2001 From: pparke Date: Tue, 2 Apr 2024 19:59:10 -0300 Subject: [PATCH 9/9] move Adam class comment so it will show up on hover --- src/optim.ts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/optim.ts b/src/optim.ts index da60971..9687d5a 100644 --- a/src/optim.ts +++ b/src/optim.ts @@ -1,5 +1,13 @@ import { Parameter, Tensor, zeros } from "./tensor"; +/** + * Adam optimizer class. + * @param {(Parameter | Tensor)[]} params - List of all Parameter or Tensor (with requires_grad = True) to be optimized by Adam. "params" is usually set to nn.Module.parameters(), which automatically returns all parameters in a list form. + * @param {number} lr - Scalar multiplying each learning step, controls speed of learning. + * @param {number} reg - Scalar controling strength l2 regularization. + * @param {(number)[]} betas - Two scalar floats controling how slowly the optimizer changes the "m" and "v" attributes. + * @param {number} eps - Scalar added to denominator to stop it from ever going to zero. + */ export class Adam { // Declare Adam's types: params: (Parameter | Tensor)[]; @@ -9,14 +17,6 @@ export class Adam { b2: number; eps: number; - /** - * Adam optimizer class. - * @param {(Parameter | Tensor)[]} params - List of all Parameter or Tensor (with requires_grad = True) to be optimized by Adam. "params" is usually set to nn.Module.parameters(), which automatically returns all parameters in a list form. - * @param {number} lr - Scalar multiplying each learning step, controls speed of learning. - * @param {number} reg - Scalar controling strength l2 regularization. - * @param {(number)[]} betas - Two scalar floats controling how slowly the optimizer changes the "m" and "v" attributes. - * @param {number} eps - Scalar added to denominator to stop it from ever going to zero. - */ constructor( params: (Parameter | Tensor)[], lr = 1e-3,