Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
  • Loading branch information
FL03 committed May 25, 2024
1 parent 987ed28 commit 3bfd068
Show file tree
Hide file tree
Showing 18 changed files with 145 additions and 85 deletions.
25 changes: 17 additions & 8 deletions core/src/nn/dropout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use ndrand::{rand_distr::Bernoulli, RandomExt};
use num::traits::Num;

#[cfg(feature = "rand")]
pub fn dropout<A, S, D>(array: &ArrayBase<S, D>, p: f64) -> Array<A, D>
pub(crate) fn _dropout<A, S, D>(array: &ArrayBase<S, D>, p: f64) -> Array<A, D>
where
A: Num + ScalarOperand,
D: Dimension,
Expand All @@ -29,7 +29,7 @@ where
}

/// [Dropout] randomly zeroizes elements with a given probability (`p`).
pub trait Dropout {
pub trait DropOut {
type Output;

fn dropout(&self, p: f64) -> Self::Output;
Expand All @@ -44,15 +44,15 @@ pub trait Dropout {
/// - (p) Probability of dropping an element
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
pub struct DropoutLayer {
pub struct Dropout {
pub(crate) p: f64,
}

/*
************* Implementations *************
*/
#[cfg(feature = "rand")]
impl<A, S, D> Dropout for ArrayBase<S, D>
impl<A, S, D> DropOut for ArrayBase<S, D>
where
A: Num + ScalarOperand,
D: Dimension,
Expand All @@ -61,28 +61,37 @@ where
type Output = Array<A, D>;

fn dropout(&self, p: f64) -> Self::Output {
dropout(self, p)
_dropout(self, p)
}
}

impl DropoutLayer {
impl Dropout {
pub fn new(p: f64) -> Self {
Self { p }
}

pub fn apply<A, S, D>(&self, input: &ArrayBase<S, D>) -> Array<A, D>
where
A: Num + ScalarOperand,
D: Dimension,
S: DataOwned<Elem = A>,
{
_dropout(input, self.p)
}

pub fn scale(&self) -> f64 {
(1f64 - self.p).recip()
}
}

impl Default for DropoutLayer {
impl Default for Dropout {
fn default() -> Self {
Self::new(0.5)
}
}

#[cfg(feature = "rand")]
impl<A, S, D> Forward<ArrayBase<S, D>> for DropoutLayer
impl<A, S, D> Forward<ArrayBase<S, D>> for Dropout
where
A: Num + ScalarOperand,
D: Dimension,
Expand Down
4 changes: 2 additions & 2 deletions core/tests/nn.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![allow(unused_imports)]
extern crate concision_core as concision;

use concision::nn::DropoutLayer;
use concision::nn::Dropout;
use concision::Forward;
use ndarray::prelude::*;

Expand All @@ -10,7 +10,7 @@ use ndarray::prelude::*;
fn test_dropout() {
let shape = (512, 2048);
let arr = Array2::<f64>::ones(shape);
let dropout = DropoutLayer::new(0.5);
let dropout = Dropout::new(0.5);
let out = dropout.forward(&arr);

assert!(arr.iter().all(|&x| x == 1.0));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,17 @@ pub(crate) fn dk(d_model: usize, heads: usize) -> usize {

#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
pub struct Config {
pub d_model: usize,
pub heads: usize,
pub struct AttentionConfig {
pub d_model: usize, // embedding size; default is 512
pub heads: usize, // number of heads; default is 8
}

impl Config {
pub fn new() -> ConfigBuilder {
impl AttentionConfig {
pub fn new(d_model: usize, heads: usize) -> Self {
Self { d_model, heads }
}
///

Check warning

Code scanning / clippy

empty doc comment Warning

empty doc comment
pub fn create() -> ConfigBuilder {
ConfigBuilder::new()
}

Expand All @@ -32,7 +36,7 @@ impl Config {
}
}

impl Default for Config {
impl Default for AttentionConfig {
fn default() -> Self {
Self {
d_model: crate::D_MODEL,
Expand All @@ -42,7 +46,7 @@ impl Default for Config {
}

concision::builder! {
ConfigBuilder(Config) {
ConfigBuilder(AttentionConfig) {
d_model: usize,
heads: usize,
}
Expand Down
23 changes: 12 additions & 11 deletions models/transformers/src/attention/head.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
use super::{Score, _attention};
use crate::params::QkvBase;
use concision::getters;
use concision::nn::DropoutLayer;
use concision::nn::Dropout;
use nd::linalg::Dot;
use nd::*;
use num::complex::ComplexFloat;
Expand All @@ -16,13 +16,14 @@ use num::complex::ComplexFloat;
/// be flexible, relying upon the n-dimensional [QkvBase] to store the query, key, and value tensors.
/// More so, the head may be configured with an optional dropout and/or masking layers.
///
/// ### Dropout
/// ### `dropout`
///
/// The [DropoutLayer] is an optional layer applied after the softmax function is applied to the
/// score. The layer is used to prevent overfitting by randomly setting a fraction of the input
/// The [Dropout] layer is an optional, conditionally enabled layer (required the `rand` feature).
/// If enabled, the dropout layer is invoked after the softmax function is applied to the score.
/// The layer is used to prevent overfitting by randomly setting a fraction of the input
/// units to zero at each update during training time.
///
/// ### Masking
/// ### `mask`
///
/// After computing the dot-product of the query and key tensors, an optional mask may be applied to
/// the attention score. The mask is used to prevent the model from attending to certain parts of the
Expand All @@ -34,7 +35,7 @@ where
S: RawData<Elem = A>,
{
#[cfg(feature = "rand")]
pub(crate) dropout: Option<DropoutLayer>,
pub(crate) dropout: Option<Dropout>,
pub(crate) mask: Option<Array<bool, D>>,
pub(crate) params: QkvBase<S, D>,
}
Expand All @@ -48,7 +49,7 @@ where
A: Default,
S: DataOwned,
{
Self::from_params(QkvBase::new((dk, dm)))
Self::from_params(QkvBase::std(dk, dm))
}
}

Expand Down Expand Up @@ -115,7 +116,7 @@ where
}
/// Sets the dropout layer for the [AttentionHead]
#[cfg(feature = "rand")]
pub fn set_dropout(&mut self, dropout: Option<DropoutLayer>) {
pub fn set_dropout(&mut self, dropout: Option<Dropout>) {
self.dropout = dropout;
}
/// Sets the mask for the [AttentionHead]
Expand All @@ -124,7 +125,7 @@ where
}
/// Configure the [AttentionHead] with a [DropoutLayer]
#[cfg(feature = "rand")]
pub fn with_dropout(self, dropout: DropoutLayer) -> Self {
pub fn with_dropout(self, dropout: Dropout) -> Self {
Self {
dropout: Some(dropout),
..self
Expand Down Expand Up @@ -153,7 +154,7 @@ where
/// Returns an immutable reference to the, optional, [dropout](DropoutLayer) layer.
/// With the `rand` feature flag disabled, the dropout layer is
/// unavailable and returns `None`.
pub fn dropout(&self) -> Option<&DropoutLayer> {
pub fn dropout(&self) -> Option<&Dropout> {
self.dropout.as_ref()
}
}
Expand All @@ -168,7 +169,7 @@ where
/// With the `rand` feature flag disabled, the dropout layer is
/// unavailable and returns `None`.
#[cfg(not(feature = "rand"))]
pub fn dropout(&self) -> Option<&DropoutLayer> {
pub fn dropout(&self) -> Option<&Dropout> {
None
}
}
12 changes: 6 additions & 6 deletions models/transformers/src/attention/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
//! the Transformer model, primarily due to its capabilities in natural language
//! processing (NLP) domains
pub(crate) use self::_impl_methods::*;
pub use self::head::AttentionHead;
pub use self::score::Score;
pub use self::{config::AttentionConfig, head::AttentionHead, score::Score};
pub use self::utils::*;

pub(crate) mod config;
pub(crate) mod head;
pub(crate) mod score;

Expand All @@ -34,7 +34,7 @@ pub trait Attention {

pub(crate) mod utils {
use super::Score;
use concision::nn::DropoutLayer;
use concision::nn::Dropout;
use nd::linalg::Dot;
use nd::prelude::*;
use num::complex::ComplexFloat;
Expand All @@ -45,7 +45,7 @@ pub(crate) mod utils {
k: &ArrayBase<S, D>,
v: &ArrayBase<S, D>,
mask: Option<&Array<bool, D>>,
dropout: Option<&DropoutLayer>,
dropout: Option<&Dropout>,
) -> Score<A, D>
where
A: ComplexFloat + nd::ScalarOperand,
Expand All @@ -60,7 +60,7 @@ pub(crate) mod utils {

mod _impl_methods {
use super::Score;
use concision::prelude::{DropoutLayer, MaskFill, Softmax};
use concision::prelude::{Dropout, MaskFill, Softmax};
use nd::linalg::Dot;
use nd::prelude::*;
use num::complex::ComplexFloat;
Expand All @@ -70,7 +70,7 @@ mod _impl_methods {
k: &ArrayBase<S, D>,
v: &ArrayBase<S, D>,
mask: Option<&Array<bool, D>>,
dropout: Option<&DropoutLayer>,
dropout: Option<&Dropout>,
) -> Score<A, D>
where
A: ComplexFloat + nd::ScalarOperand,
Expand Down
5 changes: 2 additions & 3 deletions models/transformers/src/attention/multi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
//! # Multi-Head Attention
//!
//!
pub use self::{config::Config, multi_head::*};
pub use self::multi_head::*;

pub(crate) mod config;
// pub(crate) mod config;
pub(crate) mod multi_head;

pub(crate) mod prelude {
pub use super::config::Config as MultiHeadAttentionConfig;
pub use super::multi_head::MultiHeadAttention;
}
11 changes: 5 additions & 6 deletions models/transformers/src/attention/multi/multi_head.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
Appellation: multi_head <module>
Contrib: FL03 <jo3mccain@icloud.com>
*/
use super::Config;
use crate::AttentionHead;
use crate::{attention::AttentionConfig, AttentionHead};
use linear::{Biased, Linear};
use nd::prelude::*;
use nd::{DataOwned, OwnedRepr, RawData};
Expand All @@ -13,7 +12,7 @@ where
D: Dimension,
S: RawData<Elem = A>,
{
pub(crate) config: Config,
pub(crate) config: AttentionConfig,
pub(crate) head: AttentionHead<A, D, S>,
pub(crate) linears: Vec<Linear<A, Biased, D, S>>,
}
Expand All @@ -23,7 +22,7 @@ where
D: Dimension,
S: RawData<Elem = A>,
{
pub const fn config(&self) -> &Config {
pub const fn config(&self) -> &AttentionConfig {
&self.config
}

Expand All @@ -49,7 +48,7 @@ where
A: Clone + Default,
S: DataOwned,
{
let config = Config::new().d_model(d_model).heads(heads).build();
let config = AttentionConfig::new(d_model, heads);
let linears = (0..4)
.map(|_| Linear::from_features(d_model, d_model))
.collect();
Expand All @@ -69,7 +68,7 @@ where
{
fn default() -> Self {
Self {
config: Config::default(),
config: AttentionConfig::default(),
head: AttentionHead::default(),
linears: Vec::new(),
}
Expand Down
1 change: 0 additions & 1 deletion models/transformers/src/attention/score.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ where
pub fn into_score(self) -> Array<A, D> {
self.score
}

/// Retrieve the attention tensor.
pub fn attention(&self) -> &Array<A, D> {
&self.attention
Expand Down
46 changes: 46 additions & 0 deletions models/transformers/src/config/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
Appellation: config <module>
Contrib: FL03 <jo3mccain@icloud.com>
*/


pub struct TransformerConfig {
pub heads: usize,
}

pub struct Features {

pub d_model: usize,

}

pub struct QkvShape {
pub dq: usize,
pub dk: usize,
pub dv: usize,
}

impl QkvShape {
pub fn new(dq: usize, dk: usize, dv: usize) -> Self {
Self {
dq,
dk,
dv,
}
}

pub fn std(dk: usize) -> Self {
let (dq, dv) = (dk, dk);

Self::new(dq, dk, dv)
}
}


pub struct EmbedConfig {

}

pub struct FFNConfig {

}
7 changes: 0 additions & 7 deletions models/transformers/src/ffn/mod.rs

This file was deleted.

Loading

0 comments on commit 3bfd068

Please sign in to comment.