From 9bd26aa664cea607dad30ead1de76939e596cd7d Mon Sep 17 00:00:00 2001
From: pparke <stave_menial_0y@icloud.com>
Date: Tue, 2 Apr 2024 19:40:16 -0300
Subject: [PATCH 1/9] add class fields to MultiHeadSelfAttention

---
 src/layers.ts | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/layers.ts b/src/layers.ts
index 94bff11..68e844a 100644
--- a/src/layers.ts
+++ b/src/layers.ts
@@ -119,6 +119,16 @@ export class Linear extends Module {
 }
 
 export class MultiHeadSelfAttention extends Module {
+  public Wk: Linear;
+  public Wq: Linear;
+  public Wv: Linear;
+  public residual_proj: Linear;
+  public mask: Tensor;
+  public att_dropout: Dropout;
+  public residual_dropout: Dropout;
+  public softmax: Softmax;
+  public H: number;
+
   /**
    * Full transformer Layer implementation.
    *

From 5692e7ffbc50b49de2bbb3ebf1d7ed6e663289c6 Mon Sep 17 00:00:00 2001
From: pparke <stave_menial_0y@icloud.com>
Date: Tue, 2 Apr 2024 19:41:12 -0300
Subject: [PATCH 2/9] add class fields to Linear

---
 src/layers.ts | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/layers.ts b/src/layers.ts
index 68e844a..eaa2f86 100644
--- a/src/layers.ts
+++ b/src/layers.ts
@@ -89,6 +89,10 @@ export class Module implements ModuleInterface {
 
 // Standard Layers:
 export class Linear extends Module {
+  public W: Tensor;
+  public b: Tensor;
+  public has_bias: boolean;
+
   /**
    * Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity.
    *

From bbe12a4a654284bab15a36fdb4b34eb836aa2885 Mon Sep 17 00:00:00 2001
From: pparke <stave_menial_0y@icloud.com>
Date: Tue, 2 Apr 2024 19:42:29 -0300
Subject: [PATCH 3/9] add mode to ModuleInterface

---
 src/layers.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/layers.ts b/src/layers.ts
index eaa2f86..55c62b4 100644
--- a/src/layers.ts
+++ b/src/layers.ts
@@ -23,6 +23,7 @@ interface ModuleInterface {
   train(): void;
   eval(): void;
   entries(): [string, Module | Parameter | Tensor | any][];
+  mode: "train" | "eval";
 }
 
 // Module class:

From ad55809b57196bc9f2111ccdc65c91f9bc5cff9b Mon Sep 17 00:00:00 2001
From: pparke <stave_menial_0y@icloud.com>
Date: Tue, 2 Apr 2024 19:43:12 -0300
Subject: [PATCH 4/9] add class fields to FullyConnected

---
 src/layers.ts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/layers.ts b/src/layers.ts
index 55c62b4..a7b40a3 100644
--- a/src/layers.ts
+++ b/src/layers.ts
@@ -215,6 +215,11 @@ export class MultiHeadSelfAttention extends Module {
 }
 
 export class FullyConnected extends Module {
+  public l1: Linear;
+  public relu: ReLU;
+  public l2: Linear;
+  public dropout: Dropout;
+
   /**
    * Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer.
    *

From d3c356378fa2a169eb5ff34df4610346595203b9 Mon Sep 17 00:00:00 2001
From: pparke <stave_menial_0y@icloud.com>
Date: Tue, 2 Apr 2024 19:43:48 -0300
Subject: [PATCH 5/9] add class fields to Block

---
 src/layers.ts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/layers.ts b/src/layers.ts
index a7b40a3..4610080 100644
--- a/src/layers.ts
+++ b/src/layers.ts
@@ -251,6 +251,11 @@ export class FullyConnected extends Module {
 }
 
 export class Block extends Module {
+  public att: MultiHeadSelfAttention;
+  public ln1: LayerNorm;
+  public fcc: FullyConnected;
+  public ln2: LayerNorm;
+
   /**
    * Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms.
    *

From 882e2997b95a817cfe10492ff048612e03ba141a Mon Sep 17 00:00:00 2001
From: pparke <stave_menial_0y@icloud.com>
Date: Tue, 2 Apr 2024 19:46:02 -0300
Subject: [PATCH 6/9] move jsdoc commens above class so they appear on hover

---
 src/layers.ts | 67 ++++++++++++++++++++++++++-------------------------
 1 file changed, 34 insertions(+), 33 deletions(-)

diff --git a/src/layers.ts b/src/layers.ts
index 4610080..0867925 100644
--- a/src/layers.ts
+++ b/src/layers.ts
@@ -89,19 +89,20 @@ export class Module implements ModuleInterface {
 }
 
 // Standard Layers:
+
+/**
+ * Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity.
+ *
+ * @param {number} in_size - size of the last dimention of the input array.
+ * @param {number} out_size - size of the last dimention of the output array.
+ * @param {boolean} bias - wether to include a bias term.
+ * @param {boolean} xavier - Wether to use xavier initialization (divide by square root of first input dimension).
+ */
 export class Linear extends Module {
   public W: Tensor;
   public b: Tensor;
   public has_bias: boolean;
 
-  /**
-   * Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity.
-   *
-   * @param {number} in_size - size of the last dimention of the input array.
-   * @param {number} out_size - size of the last dimention of the output array.
-   * @param {boolean} bias - wether to include a bias term.
-   * @param {boolean} xavier - Wether to use xavier initialization (divide by square root of first input dimension).
-   */
   constructor(in_size: number, out_size: number, bias = true, xavier = true) {
     super();
     this.W = randn([in_size, out_size], true, xavier);
@@ -123,6 +124,15 @@ export class Linear extends Module {
   }
 }
 
+/**
+ * Full transformer Layer implementation.
+ *
+ * @param {number} in_size - size of the last dimention of the input array.
+ * @param {number} out_size - size of the last dimention of the output array.
+ * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
+ * @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
+ * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
+ */
 export class MultiHeadSelfAttention extends Module {
   public Wk: Linear;
   public Wq: Linear;
@@ -134,15 +144,6 @@ export class MultiHeadSelfAttention extends Module {
   public softmax: Softmax;
   public H: number;
 
-  /**
-   * Full transformer Layer implementation.
-   *
-   * @param {number} in_size - size of the last dimention of the input array.
-   * @param {number} out_size - size of the last dimention of the output array.
-   * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
-   * @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
-   * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
-   */
   constructor(
     in_size: number,
     out_size: number,
@@ -214,19 +215,19 @@ export class MultiHeadSelfAttention extends Module {
   }
 }
 
+/**
+ * Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer.
+ *
+ * @param {number} in_size - size of the last dimention of the input array.
+ * @param {number} out_size - size of the last dimention of the output array.
+ * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
+ */
 export class FullyConnected extends Module {
   public l1: Linear;
   public relu: ReLU;
   public l2: Linear;
   public dropout: Dropout;
 
-  /**
-   * Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer.
-   *
-   * @param {number} in_size - size of the last dimention of the input array.
-   * @param {number} out_size - size of the last dimention of the output array.
-   * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
-   */
   constructor(in_size: number, out_size: number, dropout_prob = 0) {
     super();
 
@@ -250,21 +251,21 @@ export class FullyConnected extends Module {
   }
 }
 
+/**
+ * Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms.
+ *
+ * @param {number} in_size - size of the last dimention of the input array.
+ * @param {number} out_size - size of the last dimention of the output array.
+ * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
+ * @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
+ * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
+ */
 export class Block extends Module {
   public att: MultiHeadSelfAttention;
   public ln1: LayerNorm;
   public fcc: FullyConnected;
   public ln2: LayerNorm;
 
-  /**
-   * Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms.
-   *
-   * @param {number} in_size - size of the last dimention of the input array.
-   * @param {number} out_size - size of the last dimention of the output array.
-   * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
-   * @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
-   * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
-   */
   constructor(
     in_size: number,
     out_size: number,

From f823bd6ab39ea4f0c14f3187990641a063dbfe54 Mon Sep 17 00:00:00 2001
From: pparke <stave_menial_0y@icloud.com>
Date: Tue, 2 Apr 2024 19:49:19 -0300
Subject: [PATCH 7/9] prettier formatting and made index2 in Tensor.at()
 optional according to the comment and usage in the lib

---
 src/tensor.ts | 154 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 109 insertions(+), 45 deletions(-)

diff --git a/src/tensor.ts b/src/tensor.ts
index 4868e26..c6fa690 100644
--- a/src/tensor.ts
+++ b/src/tensor.ts
@@ -25,7 +25,7 @@ export class Tensor {
     } else if (typeof data === "number") {
       this._data = [data];
     } else {
-      throw Error('Your argument "data" is not a number or an iterable.')
+      throw Error('Your argument "data" is not a number or an iterable.');
     }
     this.shape = getShape(data);
     this.requires_grad = requires_grad;
@@ -34,7 +34,7 @@ export class Tensor {
     if (this.requires_grad) {
       this._grad = zeros(this.shape);
     }
-    
+
     // Graph connections:
     this.children = [];
     this.parents = [];
@@ -194,7 +194,9 @@ export class Tensor {
       return this.add(-other);
     } else if (other instanceof Tensor) {
       return this.add(other.neg());
-    } else { throw Error('Argument "other" is not a Tensor or a number.') }
+    } else {
+      throw Error('Argument "other" is not a Tensor or a number.');
+    }
   }
 
   /**
@@ -218,8 +220,8 @@ export class Tensor {
 
   /**
    * Divide this Tensor by integer or other Tensor.
-   * @param {any} other - Tensor or integer to divide this Tensor by.
-   * @returns {object} New tensor.
+   * @param {Tensor | number} other - Tensor or integer to divide this Tensor by.
+   * @returns {Tensor} New tensor.
    */
   div(other: Tensor | number): Tensor {
     const operation = new Div();
@@ -229,7 +231,7 @@ export class Tensor {
   /**
    * Multiply this Tensor by integer or other Tensor.
    * @param {Tensor | number} other - Tensor or integer to multiply this Tensor by.
-   * @returns {object} New tensor.
+   * @returns {Tensor} New tensor.
    */
   matmul(other: Tensor): Tensor {
     const operation = new MatMul();
@@ -301,7 +303,7 @@ export class Tensor {
    *                    [1,1,2,3]])
    * a.at([0,1,0])
    */
-  at(index1: Tensor | Array<any>, index2: Tensor | Array<any>): Tensor {
+  at(index1: Tensor | Array<any>, index2?: Tensor | Array<any>): Tensor {
     const operation = new At();
     return operation.forward(this, index1, index2);
   }
@@ -320,9 +322,11 @@ export class Tensor {
    * //                 [0,0,2,0]])
    * a.masked_fill(mask, (el) => {return el > 3}, 0)
    */
-  masked_fill(mask: Tensor,
-              condition: (someArg: number) => boolean, 
-              value: number) {
+  masked_fill(
+    mask: Tensor,
+    condition: (someArg: number) => boolean,
+    value: number
+  ) {
     const operation = new MaskedFill();
     return operation.forward(this, mask, condition, value);
   }
@@ -594,16 +598,16 @@ export class MatMul {
       requiresGrad(a) || requiresGrad(b) // requires_grad;
     );
 
-   // Connect nodes in graph:
-   if (a instanceof Tensor && requiresGrad(a)) {
-    z.parents.push(a);
-    a.children.push(z);
-  }
-  if (b instanceof Tensor && requiresGrad(b)) {
-    z.parents.push(b);
-    b.children.push(z);
-  }
-  z.operation = this;
+    // Connect nodes in graph:
+    if (a instanceof Tensor && requiresGrad(a)) {
+      z.parents.push(a);
+      a.children.push(z);
+    }
+    if (b instanceof Tensor && requiresGrad(b)) {
+      z.parents.push(b);
+      b.children.push(z);
+    }
+    z.operation = this;
 
     return z;
   }
@@ -1039,7 +1043,11 @@ export class Transpose {
 export class At {
   cache: any;
 
-  forward(a: Tensor, idx1: Tensor | Array<any>, idx2: Tensor | Array<any> | null = null): Tensor {
+  forward(
+    a: Tensor,
+    idx1: Tensor | Array<any>,
+    idx2: Tensor | Array<any> | null = null
+  ): Tensor {
     // Make sure index lists are flat JavaScript arrays:
     if (idx1) {
       idx1 = assureArray(idx1).flat(Infinity);
@@ -1094,7 +1102,12 @@ export class At {
 export class MaskedFill {
   cache: any;
 
-  forward(a: Tensor, mask: Tensor, condition: (someArg: number) => boolean, value: number): Tensor {
+  forward(
+    a: Tensor,
+    mask: Tensor,
+    condition: (someArg: number) => boolean,
+    value: number
+  ): Tensor {
     // Build cache to use in backward step:
     this.cache = [a, mask, condition];
 
@@ -1321,7 +1334,11 @@ export function transpose(a: Tensor, dim1: number, dim2: number): Tensor {
  * // Returns tensor([2,6,8]):
  * a.at([0,1,1], [2,0,2])
  */
-export function at(a: Tensor, idx1: Tensor | Array<any>, idx2: Tensor |  Array<any>): Tensor {
+export function at(
+  a: Tensor,
+  idx1: Tensor | Array<any>,
+  idx2: Tensor | Array<any>
+): Tensor {
   return a.at(idx1, idx2);
 }
 
@@ -1340,7 +1357,12 @@ export function at(a: Tensor, idx1: Tensor | Array<any>, idx2: Tensor |  Array<a
  * //                 [0,0,2,0]])
  * masked_fill(a, mask, (el) => {return el > 3}, 0)
  */
-export function masked_fill(a: Tensor, mask: Tensor, condition: (someArg: number) => boolean, value: number): Tensor {
+export function masked_fill(
+  a: Tensor,
+  mask: Tensor,
+  condition: (someArg: number) => boolean,
+  value: number
+): Tensor {
   return a.masked_fill(mask, condition, value);
 }
 
@@ -1428,9 +1450,9 @@ function _add(a: Array<any> | number, b: Array<any> | number): any {
   // If both are numbers, return number. If one is a Tensor, add number to each element in tensor.
   if (typeof a === "number" && typeof b === "number") {
     return a + b;
-  } else if (typeof a === 'number' && b instanceof Array) {
+  } else if (typeof a === "number" && b instanceof Array) {
     return b.map((element) => _add(element, a));
-  } else if (a instanceof Array && typeof b === 'number') {
+  } else if (a instanceof Array && typeof b === "number") {
     return a.map((element) => _add(element, b));
   } else if (a instanceof Array && b instanceof Array) {
     // If both are tensors, we need to broadcast:
@@ -1477,11 +1499,15 @@ function _add(a: Array<any> | number, b: Array<any> | number): any {
       } else {
         return b.map((element) => _add(a, element));
       }
-    } else {throw Error('Given arguments cannot be added.')}
-  } else {throw Error('Given arguments cannot be added.')}
+    } else {
+      throw Error("Given arguments cannot be added.");
+    }
+  } else {
+    throw Error("Given arguments cannot be added.");
+  }
 }
 
-function _neg(a: Array<any> | number): Array<any> | number{
+function _neg(a: Array<any> | number): Array<any> | number {
   // If a is a number, make it negative. If not, make all of its elements negative:
   if (typeof a === "number") {
     return -a;
@@ -1613,8 +1639,10 @@ function _matmul(a: Array<any>, b: Array<any>): Array<any> {
   }
   // If this dimension has equal lengths, keep searching:
   if (typeof a[0][0] === "object") {
-    return a.map((element: Array<any>, idx: number) => _matmul(element, b[idx]));
-  // If not, try to matmul:
+    return a.map((element: Array<any>, idx: number) =>
+      _matmul(element, b[idx])
+    );
+    // If not, try to matmul:
   } else {
     // If dimensions align, perform matmul:
     if (a[0].length === b.length && typeof a[0][0] === "number") {
@@ -1664,7 +1692,7 @@ function _sqrt(a: Array<any> | number): Array<any> | number {
   }
 }
 
-function _exp(a:Array<any> | number): Array<any> | number {
+function _exp(a: Array<any> | number): Array<any> | number {
   // If a is a number, exponentiate it. If not, exponentiate all of its elements:
   if (typeof a === "number") {
     return 2.718281828459045 ** a;
@@ -1707,7 +1735,11 @@ function _transpose(a: Array<any>, dim: number): Array<any> {
   }
 }
 
-function _at(a: Array<any>, idx1: Array<any>, idx2: Array<any> | null): Array<any> {
+function _at(
+  a: Array<any>,
+  idx1: Array<any>,
+  idx2: Array<any> | null
+): Array<any> {
   // If there is a second index, fill a new array in position "N" with a[idx1[N]][idx2[N]] (2 Dims):
   if (idx2) {
     return Array(idx1.length)
@@ -1721,7 +1753,12 @@ function _at(a: Array<any>, idx1: Array<any>, idx2: Array<any> | null): Array<an
   }
 }
 
-function _masked_fill(a: Array<any> | number, mask: Array<any> | number, condition: (someArg: number) => boolean, value: number): Array<any> | number {
+function _masked_fill(
+  a: Array<any> | number,
+  mask: Array<any> | number,
+  condition: (someArg: number) => boolean,
+  value: number
+): Array<any> | number {
   // If a is a number, test "condition" on it. If not, recursive step to all of its elements:
   if (typeof mask === "number") {
     if (typeof a != "number") {
@@ -1767,7 +1804,10 @@ export function _reshape(a: Array<any>, shape: Array<number>): Array<any> {
  * @param {function} valueFunc - Function that returns number to fill up the Tensor.
  * @returns {object} New tensor.
  */
-function _tensorInitializer(shape: Array<number>, valueFunc: () => number): Array<any> {
+function _tensorInitializer(
+  shape: Array<number>,
+  valueFunc: () => number
+): Array<any> {
   if (shape.length === 1) {
     const emptyArray = Array(shape[0]).fill(0);
     return emptyArray.map(() => valueFunc());
@@ -1853,7 +1893,11 @@ export function rand(shape: Array<number>, requires_grad = false): Tensor {
  * @param {boolean} xavier - Whether to use xavier initialization (divide by square root of first input dimension).
  * @returns {object} New tensor.
  */
-export function randn(shape: Array<number>, requires_grad = false, xavier = false): Tensor {
+export function randn(
+  shape: Array<number>,
+  requires_grad = false,
+  xavier = false
+): Tensor {
   return new Tensor(
     _tensorInitializer(shape, () => {
       const mean = Math.random() + 0.00001;
@@ -1879,12 +1923,17 @@ export function randn(shape: Array<number>, requires_grad = false, xavier = fals
  * @param {boolean} requires_grad - Whether to keep track of this tensor's gradients.
  * @returns {object} New tensor.
  */
-export function randint(low = 0, high = 1, shape = [1], requires_grad = false): Tensor {
+export function randint(
+  low = 0,
+  high = 1,
+  shape = [1],
+  requires_grad = false
+): Tensor {
   return new Tensor(
     _tensorInitializer(shape, () => {
       return Math.floor(Math.random() * (high - low)) + low;
     }),
-    (requires_grad)
+    requires_grad
   );
 }
 
@@ -1917,7 +1966,10 @@ export function requiresGrad(a: Tensor | number | Array<any>): boolean {
  * broadcast(ones([5,3,2]), ones([4,5,3,1]));
  */
 export function broadcast(a: Tensor, b: Tensor): Tensor {
-  function _broadcast(out: Array<any> | number, b: Array<any> | number): Array<any> | number {
+  function _broadcast(
+    out: Array<any> | number,
+    b: Array<any> | number
+  ): Array<any> | number {
     if (typeof out === "number" && typeof b === "number") {
       return out;
     } else if (typeof out === "number" && b instanceof Array) {
@@ -1928,7 +1980,6 @@ export function broadcast(a: Tensor, b: Tensor): Tensor {
     } else if (JSON.stringify(getShape(out)) === JSON.stringify(getShape(b))) {
       return out;
     } else if (out instanceof Array && b instanceof Array) {
-
       // If both are tensors, we need to broadcast:
       const outShape = getShape(out);
       const bShape = getShape(b);
@@ -1975,7 +2026,10 @@ export function broadcast(a: Tensor, b: Tensor): Tensor {
         }
       } else {
         // Define recursive function to find dimension with length 1:
-        const _broadcastSideways = (out: Array<any> | number | null, b: Array<any>): Array<any>  => {
+        const _broadcastSideways = (
+          out: Array<any> | number | null,
+          b: Array<any>
+        ): Array<any> => {
           if (out instanceof Array && b.length != out.length) {
             if (b.length === 1) {
               // Base case, contract existing dimension:
@@ -2001,13 +2055,17 @@ export function broadcast(a: Tensor, b: Tensor): Tensor {
               return [null].map((element, idx) =>
                 _broadcastSideways(element, b[idx])
               );
-            } else {throw Error('Shapes not broadcastable.')}
+            } else {
+              throw Error("Shapes not broadcastable.");
+            }
           }
         };
         // Return final broadcast tensor:
         return _broadcastSideways(out, b);
       }
-    } else { throw Error ("Shapes not broadcastable.")}
+    } else {
+      throw Error("Shapes not broadcastable.");
+    }
   }
 
   let out = a.data;
@@ -2026,8 +2084,14 @@ export function broadcast(a: Tensor, b: Tensor): Tensor {
  * // Returns tensor with shape [4,2,3]:
  * broadcastUp(ones([2,3]), ones([4,3,2]));
  */
-export function broadcastUp(inElement: Array<any>, outElement: Array<any>): Array<any> {
-  function _broadcastUp(inElement: Array<any>, outElement: Array<any>): Array<any> {
+export function broadcastUp(
+  inElement: Array<any>,
+  outElement: Array<any>
+): Array<any> {
+  function _broadcastUp(
+    inElement: Array<any>,
+    outElement: Array<any>
+  ): Array<any> {
     if (getShape(inElement).length + 1 === getShape(outElement).length) {
       // Base case, create new dimension:
       const emptyArray = Array(outElement.length).fill(zeros);

From 2f7255eae159aaa7161414d85355076193a5a584 Mon Sep 17 00:00:00 2001
From: pparke <stave_menial_0y@icloud.com>
Date: Tue, 2 Apr 2024 19:52:36 -0300
Subject: [PATCH 8/9] added remaining class fields and moved comment blocks

---
 src/layers.ts | 75 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/src/layers.ts b/src/layers.ts
index 0867925..4bf4f2e 100644
--- a/src/layers.ts
+++ b/src/layers.ts
@@ -301,13 +301,16 @@ export class Block extends Module {
 }
 
 // Embedding Layers
+
+/**
+ * Embedding class, turns indexes into vectors.
+ *
+ * @param {number} in_size - number of different indexes (vocabulary size).
+ * @param {number} out_size - size of the embedding vector generated.
+ */
 export class Embedding extends Module {
-  /**
-   * Embedding class, turns indexes into vectors.
-   *
-   * @param {number} in_size - number of different indexes (vocabulary size).
-   * @param {number} out_size - size of the embedding vector generated.
-   */
+  public E: Tensor;
+
   constructor(in_size: number, embed_size: number) {
     super();
     this.E = randn([in_size, embed_size], true, false);
@@ -331,13 +334,15 @@ export class Embedding extends Module {
   }
 }
 
+/**
+ * Embedding class, turns indexes into vectors.
+ *
+ * @param {number} n_timesteps - number of different embeddings (number of timesteps in each instance in batch).
+ * @param {number} embed_size - size of the embedding vector generated.
+ */
 export class PositionalEmbedding extends Module {
-  /**
-   * Embedding class, turns indexes into vectors.
-   *
-   * @param {number} n_timesteps - number of different embeddings (number of timesteps in each instance in batch).
-   * @param {number} embed_size - size of the embedding vector generated.
-   */
+  public E: Tensor;
+
   constructor(n_timesteps: number, embed_size: number) {
     super();
     this.E = randn([n_timesteps, embed_size], true, false);
@@ -359,10 +364,11 @@ export class PositionalEmbedding extends Module {
 }
 
 // Non-linearity Layers:
+
+/**
+ * Rectified Linear Unit nonlinearity. Returns z if z>0 else 0.
+ */
 export class ReLU extends Module {
-  /**
-   * Rectified Linear Unit nonlinearity. Returns z if z>0 else 0.
-   */
   constructor() {
     super();
   }
@@ -396,10 +402,10 @@ export class ReLU extends Module {
   }
 }
 
+/**
+ * Softmax nonlinearity class. Returns distribution of values (sum=1).
+ */
 export class Softmax extends Module {
-  /**
-   * Softmax nonlinearity class. Returns distribution of values (sum=1).
-   */
   constructor() {
     super();
   }
@@ -418,12 +424,15 @@ export class Softmax extends Module {
 }
 
 // Regularization Layers:
+
+/**
+ * Dropout class, added usually after other layers, to drop values to zero with given probability
+ *
+ * @param {number} drop_prob - probability to drop each value in input.
+ */
 export class Dropout extends Module {
-  /**
-   * Dropout class, added usually after other layers, to drop values to zero with given probability
-   *
-   * @param {number} drop_prob - probability to drop each value in input.
-   */
+  public p: number;
+
   constructor(drop_prob: number) {
     super();
     this.p = drop_prob;
@@ -453,12 +462,15 @@ export class Dropout extends Module {
   }
 }
 
+/**
+ * Layer Norm class, added usually after other layers to normalize across all of the output.
+ *
+ * @param {number} n_embed - size of the last dimention of the input.
+ */
 export class LayerNorm extends Module {
-  /**
-   * Layer Norm class, added usually after other layers to normalize across all of the output.
-   *
-   * @param {number} n_embed - size of the last dimention of the input.
-   */
+  public gamma: Tensor;
+  public beta: Tensor;
+
   constructor(n_embed: number) {
     super();
     this.gamma = ones([n_embed], true);
@@ -474,10 +486,11 @@ export class LayerNorm extends Module {
 }
 
 // Loss layers:
+
+/**
+ * Cross Entropy Loss class, returns the loss given the output and the expected indexes.
+ */
 export class CrossEntropyLoss extends Module {
-  /**
-   * Cross Entropy Loss class, returns the loss given the output and the expected indexes.
-   */
   constructor() {
     super();
   }

From 228e8e81fa1c946db030745e7366d49564d45559 Mon Sep 17 00:00:00 2001
From: pparke <stave_menial_0y@icloud.com>
Date: Tue, 2 Apr 2024 19:59:10 -0300
Subject: [PATCH 9/9] move Adam class comment so it will show up on hover

---
 src/optim.ts | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/optim.ts b/src/optim.ts
index da60971..9687d5a 100644
--- a/src/optim.ts
+++ b/src/optim.ts
@@ -1,5 +1,13 @@
 ﻿import { Parameter, Tensor, zeros } from "./tensor";
 
+/**
+ * Adam optimizer class.
+ * @param {(Parameter | Tensor)[]} params - List of all Parameter or Tensor (with requires_grad = True) to be optimized by Adam. "params" is usually set to nn.Module.parameters(), which automatically returns all parameters in a list form.
+ * @param {number} lr - Scalar multiplying each learning step, controls speed of learning.
+ * @param {number} reg - Scalar controling strength l2 regularization.
+ * @param {(number)[]} betas - Two scalar floats controling how slowly the optimizer changes the "m" and "v" attributes.
+ * @param {number} eps - Scalar added to denominator to stop it from ever going to zero.
+ */
 export class Adam {
   // Declare Adam's types:
   params: (Parameter | Tensor)[];
@@ -9,14 +17,6 @@ export class Adam {
   b2: number;
   eps: number;
 
-  /**
-   * Adam optimizer class.
-   * @param {(Parameter | Tensor)[]} params - List of all Parameter or Tensor (with requires_grad = True) to be optimized by Adam. "params" is usually set to nn.Module.parameters(), which automatically returns all parameters in a list form.
-   * @param {number} lr - Scalar multiplying each learning step, controls speed of learning.
-   * @param {number} reg - Scalar controling strength l2 regularization.
-   * @param {(number)[]} betas - Two scalar floats controling how slowly the optimizer changes the "m" and "v" attributes.
-   * @param {number} eps - Scalar added to denominator to stop it from ever going to zero.
-   */
   constructor(
     params: (Parameter | Tensor)[],
     lr = 1e-3,