Skip to content

Commit

Permalink
Merge pull request #16 from pparke/help-with-types-1
Browse files Browse the repository at this point in the history
Add class fields in layers.ts and move doc comments
  • Loading branch information
eduardoleao052 authored Apr 3, 2024
2 parents a3789c5 + 228e8e8 commit e8c6802
Show file tree
Hide file tree
Showing 3 changed files with 220 additions and 117 deletions.
167 changes: 103 additions & 64 deletions src/layers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ interface ModuleInterface {
train(): void;
eval(): void;
entries(): [string, Module | Parameter | Tensor | any][];
mode: "train" | "eval";
}

// Module class:
Expand Down Expand Up @@ -88,15 +89,20 @@ export class Module implements ModuleInterface {
}

// Standard Layers:

/**
* Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {boolean} bias - wether to include a bias term.
* @param {boolean} xavier - Wether to use xavier initialization (divide by square root of first input dimension).
*/
export class Linear extends Module {
/**
* Simple linear layer, with weight matrix and optional bias. Does not contain nonlinearity.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {boolean} bias - wether to include a bias term.
* @param {boolean} xavier - Wether to use xavier initialization (divide by square root of first input dimension).
*/
public W: Tensor;
public b: Tensor;
public has_bias: boolean;

constructor(in_size: number, out_size: number, bias = true, xavier = true) {
super();
this.W = randn([in_size, out_size], true, xavier);
Expand All @@ -118,16 +124,26 @@ export class Linear extends Module {
}
}

/**
* Full transformer Layer implementation.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
* @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
* @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
*/
export class MultiHeadSelfAttention extends Module {
/**
* Full transformer Layer implementation.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
* @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
* @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
*/
public Wk: Linear;
public Wq: Linear;
public Wv: Linear;
public residual_proj: Linear;
public mask: Tensor;
public att_dropout: Dropout;
public residual_dropout: Dropout;
public softmax: Softmax;
public H: number;

constructor(
in_size: number,
out_size: number,
Expand Down Expand Up @@ -199,14 +215,19 @@ export class MultiHeadSelfAttention extends Module {
}
}

/**
* Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
*/
export class FullyConnected extends Module {
/**
* Small block composed of two Linear layers, a ReLU non-linearity and a Dropout layer.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
*/
public l1: Linear;
public relu: ReLU;
public l2: Linear;
public dropout: Dropout;

constructor(in_size: number, out_size: number, dropout_prob = 0) {
super();

Expand All @@ -230,16 +251,21 @@ export class FullyConnected extends Module {
}
}

/**
* Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
* @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
* @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
*/
export class Block extends Module {
/**
* Full transformer decoder block. Composed of Multi Head Self Attention, Fully connected layers and Layer Norms.
*
* @param {number} in_size - size of the last dimention of the input array.
* @param {number} out_size - size of the last dimention of the output array.
* @param {number} n_heads - number of parallel heads to be computed (must equally divide in_size).
* @param {number} n_timesteps - length of text sequence to be processed bt Transformer.
* @param {number} dropout_prob - probability of zeroing each activation in dropout Layer.
*/
public att: MultiHeadSelfAttention;
public ln1: LayerNorm;
public fcc: FullyConnected;
public ln2: LayerNorm;

constructor(
in_size: number,
out_size: number,
Expand Down Expand Up @@ -275,13 +301,16 @@ export class Block extends Module {
}

// Embedding Layers

/**
* Embedding class, turns indexes into vectors.
*
* @param {number} in_size - number of different indexes (vocabulary size).
* @param {number} out_size - size of the embedding vector generated.
*/
export class Embedding extends Module {
/**
* Embedding class, turns indexes into vectors.
*
* @param {number} in_size - number of different indexes (vocabulary size).
* @param {number} out_size - size of the embedding vector generated.
*/
public E: Tensor;

constructor(in_size: number, embed_size: number) {
super();
this.E = randn([in_size, embed_size], true, false);
Expand All @@ -305,13 +334,15 @@ export class Embedding extends Module {
}
}

/**
* Embedding class, turns indexes into vectors.
*
* @param {number} n_timesteps - number of different embeddings (number of timesteps in each instance in batch).
* @param {number} embed_size - size of the embedding vector generated.
*/
export class PositionalEmbedding extends Module {
/**
* Embedding class, turns indexes into vectors.
*
* @param {number} n_timesteps - number of different embeddings (number of timesteps in each instance in batch).
* @param {number} embed_size - size of the embedding vector generated.
*/
public E: Tensor;

constructor(n_timesteps: number, embed_size: number) {
super();
this.E = randn([n_timesteps, embed_size], true, false);
Expand All @@ -333,10 +364,11 @@ export class PositionalEmbedding extends Module {
}

// Non-linearity Layers:

/**
* Rectified Linear Unit nonlinearity. Returns z if z>0 else 0.
*/
export class ReLU extends Module {
/**
* Rectified Linear Unit nonlinearity. Returns z if z>0 else 0.
*/
constructor() {
super();
}
Expand Down Expand Up @@ -370,10 +402,10 @@ export class ReLU extends Module {
}
}

/**
* Softmax nonlinearity class. Returns distribution of values (sum=1).
*/
export class Softmax extends Module {
/**
* Softmax nonlinearity class. Returns distribution of values (sum=1).
*/
constructor() {
super();
}
Expand All @@ -392,12 +424,15 @@ export class Softmax extends Module {
}

// Regularization Layers:

/**
* Dropout class, added usually after other layers, to drop values to zero with given probability
*
* @param {number} drop_prob - probability to drop each value in input.
*/
export class Dropout extends Module {
/**
* Dropout class, added usually after other layers, to drop values to zero with given probability
*
* @param {number} drop_prob - probability to drop each value in input.
*/
public p: number;

constructor(drop_prob: number) {
super();
this.p = drop_prob;
Expand Down Expand Up @@ -427,12 +462,15 @@ export class Dropout extends Module {
}
}

/**
* Layer Norm class, added usually after other layers to normalize across all of the output.
*
* @param {number} n_embed - size of the last dimention of the input.
*/
export class LayerNorm extends Module {
/**
* Layer Norm class, added usually after other layers to normalize across all of the output.
*
* @param {number} n_embed - size of the last dimention of the input.
*/
public gamma: Tensor;
public beta: Tensor;

constructor(n_embed: number) {
super();
this.gamma = ones([n_embed], true);
Expand All @@ -448,10 +486,11 @@ export class LayerNorm extends Module {
}

// Loss layers:

/**
* Cross Entropy Loss class, returns the loss given the output and the expected indexes.
*/
export class CrossEntropyLoss extends Module {
/**
* Cross Entropy Loss class, returns the loss given the output and the expected indexes.
*/
constructor() {
super();
}
Expand Down
16 changes: 8 additions & 8 deletions src/optim.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import { Parameter, Tensor, zeros } from "./tensor";

/**
* Adam optimizer class.
* @param {(Parameter | Tensor)[]} params - List of all Parameter or Tensor (with requires_grad = True) to be optimized by Adam. "params" is usually set to nn.Module.parameters(), which automatically returns all parameters in a list form.
* @param {number} lr - Scalar multiplying each learning step, controls speed of learning.
* @param {number} reg - Scalar controling strength l2 regularization.
* @param {(number)[]} betas - Two scalar floats controling how slowly the optimizer changes the "m" and "v" attributes.
* @param {number} eps - Scalar added to denominator to stop it from ever going to zero.
*/
export class Adam {
// Declare Adam's types:
params: (Parameter | Tensor)[];
Expand All @@ -9,14 +17,6 @@ export class Adam {
b2: number;
eps: number;

/**
* Adam optimizer class.
* @param {(Parameter | Tensor)[]} params - List of all Parameter or Tensor (with requires_grad = True) to be optimized by Adam. "params" is usually set to nn.Module.parameters(), which automatically returns all parameters in a list form.
* @param {number} lr - Scalar multiplying each learning step, controls speed of learning.
* @param {number} reg - Scalar controling strength l2 regularization.
* @param {(number)[]} betas - Two scalar floats controling how slowly the optimizer changes the "m" and "v" attributes.
* @param {number} eps - Scalar added to denominator to stop it from ever going to zero.
*/
constructor(
params: (Parameter | Tensor)[],
lr = 1e-3,
Expand Down
Loading

0 comments on commit e8c6802

Please sign in to comment.