Merge pull request #16 from pparke/help-with-types-1

Add class fields in layers.ts and move doc comments
eduardoleao052 · Apr 3, 2024 · e8c6802 · e8c6802
2 parents a3789c5 + 228e8e8
commit e8c6802
Show file tree

Hide file tree

Showing 3 changed files with 220 additions and 117 deletions.
diff --git a/src/layers.ts b/src/layers.ts
@@ -23,6 +23,7 @@ interface ModuleInterface {
   train(): void;
   eval(): void;
   entries(): [string, Module | Parameter | Tensor | any][];
+  mode: "train" | "eval";
 }
 
 // Module class:
@@ -88,15 +89,20 @@ export class Module implements ModuleInterface {
 }
 
 // Standard Layers:
+
+/**
+ * Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity.
+ *
+ * @param {number} in_size - size of the last dimention of the input array.
+ * @param {number} out_size - size of the last dimention of the output array.
+ * @param {boolean} bias - wether to include a bias term.
+ * @param {boolean} xavier - Wether to use xavier initialization (divide by square root of first input dimension).
+ */
 export class Linear extends Module {
-  /**
-   * Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity.
-   *
-   * @param {number} in_size - size of the last dimention of the input array.
-   * @param {number} out_size - size of the last dimention of the output array.
-   * @param {boolean} bias - wether to include a bias term.
-   * @param {boolean} xavier - Wether to use xavier initialization (divide by square root of first input dimension).
-   */
+  public W: Tensor;
+  public b: Tensor;
+  public has_bias: boolean;
+
   constructor(in_size: number, out_size: number, bias = true, xavier = true) {
     super();
     this.W = randn([in_size, out_size], true, xavier);
@@ -118,16 +124,26 @@ export class Linear extends Module {
   }
 }
 
+/**
+ * Full transformer Layer implementation.
+ *
+ * @param {number} in_size - size of the last dimention of the input array.
+ * @param {number} out_size - size of the last dimention of the output array.
+ * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
+ * @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
+ * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
+ */
 export class MultiHeadSelfAttention extends Module {
-  /**
-   * Full transformer Layer implementation.
-   *
-   * @param {number} in_size - size of the last dimention of the input array.
-   * @param {number} out_size - size of the last dimention of the output array.
-   * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
-   * @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
-   * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
-   */
+  public Wk: Linear;
+  public Wq: Linear;
+  public Wv: Linear;
+  public residual_proj: Linear;
+  public mask: Tensor;
+  public att_dropout: Dropout;
+  public residual_dropout: Dropout;
+  public softmax: Softmax;
+  public H: number;
+
   constructor(
     in_size: number,
     out_size: number,
@@ -199,14 +215,19 @@ export class MultiHeadSelfAttention extends Module {
   }
 }
 
+/**
+ * Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer.
+ *
+ * @param {number} in_size - size of the last dimention of the input array.
+ * @param {number} out_size - size of the last dimention of the output array.
+ * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
+ */
 export class FullyConnected extends Module {
-  /**
-   * Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer.
-   *
-   * @param {number} in_size - size of the last dimention of the input array.
-   * @param {number} out_size - size of the last dimention of the output array.
-   * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
-   */
+  public l1: Linear;
+  public relu: ReLU;
+  public l2: Linear;
+  public dropout: Dropout;
+
   constructor(in_size: number, out_size: number, dropout_prob = 0) {
     super();
 
@@ -230,16 +251,21 @@ export class FullyConnected extends Module {
   }
 }
 
+/**
+ * Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms.
+ *
+ * @param {number} in_size - size of the last dimention of the input array.
+ * @param {number} out_size - size of the last dimention of the output array.
+ * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
+ * @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
+ * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
+ */
 export class Block extends Module {
-  /**
-   * Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms.
-   *
-   * @param {number} in_size - size of the last dimention of the input array.
-   * @param {number} out_size - size of the last dimention of the output array.
-   * @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
-   * @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
-   * @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
-   */
+  public att: MultiHeadSelfAttention;
+  public ln1: LayerNorm;
+  public fcc: FullyConnected;
+  public ln2: LayerNorm;
+
   constructor(
     in_size: number,
     out_size: number,
@@ -275,13 +301,16 @@ export class Block extends Module {
 }
 
 // Embedding Layers
+
+/**
+ * Embedding class, turns indexes into vectors.
+ *
+ * @param {number} in_size - number of different indexes (vocabulary size).
+ * @param {number} out_size - size of the embedding vector generated.
+ */
 export class Embedding extends Module {
-  /**
-   * Embedding class, turns indexes into vectors.
-   *
-   * @param {number} in_size - number of different indexes (vocabulary size).
-   * @param {number} out_size - size of the embedding vector generated.
-   */
+  public E: Tensor;
+
   constructor(in_size: number, embed_size: number) {
     super();
     this.E = randn([in_size, embed_size], true, false);
@@ -305,13 +334,15 @@ export class Embedding extends Module {
   }
 }
 
+/**
+ * Embedding class, turns indexes into vectors.
+ *
+ * @param {number} n_timesteps - number of different embeddings (number of timesteps in each instance in batch).
+ * @param {number} embed_size - size of the embedding vector generated.
+ */
 export class PositionalEmbedding extends Module {
-  /**
-   * Embedding class, turns indexes into vectors.
-   *
-   * @param {number} n_timesteps - number of different embeddings (number of timesteps in each instance in batch).
-   * @param {number} embed_size - size of the embedding vector generated.
-   */
+  public E: Tensor;
+
   constructor(n_timesteps: number, embed_size: number) {
     super();
     this.E = randn([n_timesteps, embed_size], true, false);
@@ -333,10 +364,11 @@ export class PositionalEmbedding extends Module {
 }
 
 // Non-linearity Layers:
+
+/**
+ * Rectified Linear Unit nonlinearity. Returns z if z>0 else 0.
+ */
 export class ReLU extends Module {
-  /**
-   * Rectified Linear Unit nonlinearity. Returns z if z>0 else 0.
-   */
   constructor() {
     super();
   }
@@ -370,10 +402,10 @@ export class ReLU extends Module {
   }
 }
 
+/**
+ * Softmax nonlinearity class. Returns distribution of values (sum=1).
+ */
 export class Softmax extends Module {
-  /**
-   * Softmax nonlinearity class. Returns distribution of values (sum=1).
-   */
   constructor() {
     super();
   }
@@ -392,12 +424,15 @@ export class Softmax extends Module {
 }
 
 // Regularization Layers:
+
+/**
+ * Dropout class, added usually after other layers, to drop values to zero with given probability
+ *
+ * @param {number} drop_prob - probability to drop each value in input.
+ */
 export class Dropout extends Module {
-  /**
-   * Dropout class, added usually after other layers, to drop values to zero with given probability
-   *
-   * @param {number} drop_prob - probability to drop each value in input.
-   */
+  public p: number;
+
   constructor(drop_prob: number) {
     super();
     this.p = drop_prob;
@@ -427,12 +462,15 @@ export class Dropout extends Module {
   }
 }
 
+/**
+ * Layer Norm class, added usually after other layers to normalize across all of the output.
+ *
+ * @param {number} n_embed - size of the last dimention of the input.
+ */
 export class LayerNorm extends Module {
-  /**
-   * Layer Norm class, added usually after other layers to normalize across all of the output.
-   *
-   * @param {number} n_embed - size of the last dimention of the input.
-   */
+  public gamma: Tensor;
+  public beta: Tensor;
+
   constructor(n_embed: number) {
     super();
     this.gamma = ones([n_embed], true);
@@ -448,10 +486,11 @@ export class LayerNorm extends Module {
 }
 
 // Loss layers:
+
+/**
+ * Cross Entropy Loss class, returns the loss given the output and the expected indexes.
+ */
 export class CrossEntropyLoss extends Module {
-  /**
-   * Cross Entropy Loss class, returns the loss given the output and the expected indexes.
-   */
   constructor() {
     super();
   }

diff --git a/src/optim.ts b/src/optim.ts
@@ -1,5 +1,13 @@
 import { Parameter, Tensor, zeros } from "./tensor";
 
+/**
+ * Adam optimizer class.
+ * @param {(Parameter | Tensor)[]} params - List of all Parameter or Tensor (with requires_grad = True) to be optimized by Adam. "params" is usually set to nn.Module.parameters(), which automatically returns all parameters in a list form.
+ * @param {number} lr - Scalar multiplying each learning step, controls speed of learning.
+ * @param {number} reg - Scalar controling strength l2 regularization.
+ * @param {(number)[]} betas - Two scalar floats controling how slowly the optimizer changes the "m" and "v" attributes.
+ * @param {number} eps - Scalar added to denominator to stop it from ever going to zero.
+ */
 export class Adam {
   // Declare Adam's types:
   params: (Parameter | Tensor)[];
@@ -9,14 +17,6 @@ export class Adam {
   b2: number;
   eps: number;
 
-  /**
-   * Adam optimizer class.
-   * @param {(Parameter | Tensor)[]} params - List of all Parameter or Tensor (with requires_grad = True) to be optimized by Adam. "params" is usually set to nn.Module.parameters(), which automatically returns all parameters in a list form.
-   * @param {number} lr - Scalar multiplying each learning step, controls speed of learning.
-   * @param {number} reg - Scalar controling strength l2 regularization.
-   * @param {(number)[]} betas - Two scalar floats controling how slowly the optimizer changes the "m" and "v" attributes.
-   * @param {number} eps - Scalar added to denominator to stop it from ever going to zero.
-   */
   constructor(
     params: (Parameter | Tensor)[],
     lr = 1e-3,