update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
FL03 · May 25, 2024 · 3bfd068 · 3bfd068
1 parent 987ed28
commit 3bfd068
Show file tree

Hide file tree

Showing 18 changed files with 145 additions and 85 deletions.
diff --git a/core/src/nn/dropout.rs b/core/src/nn/dropout.rs
@@ -11,7 +11,7 @@ use ndrand::{rand_distr::Bernoulli, RandomExt};
 use num::traits::Num;
 
 #[cfg(feature = "rand")]
-pub fn dropout<A, S, D>(array: &ArrayBase<S, D>, p: f64) -> Array<A, D>
+pub(crate) fn _dropout<A, S, D>(array: &ArrayBase<S, D>, p: f64) -> Array<A, D>
 where
     A: Num + ScalarOperand,
     D: Dimension,
@@ -29,7 +29,7 @@ where
 }
 
 /// [Dropout] randomly zeroizes elements with a given probability (`p`).
-pub trait Dropout {
+pub trait DropOut {
     type Output;
 
     fn dropout(&self, p: f64) -> Self::Output;
@@ -44,15 +44,15 @@ pub trait Dropout {
 /// - (p) Probability of dropping an element
 #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
-pub struct DropoutLayer {
+pub struct Dropout {
     pub(crate) p: f64,
 }
 
 /*
  ************* Implementations *************
 */
 #[cfg(feature = "rand")]
-impl<A, S, D> Dropout for ArrayBase<S, D>
+impl<A, S, D> DropOut for ArrayBase<S, D>
 where
     A: Num + ScalarOperand,
     D: Dimension,
@@ -61,28 +61,37 @@ where
     type Output = Array<A, D>;
 
     fn dropout(&self, p: f64) -> Self::Output {
-        dropout(self, p)
+        _dropout(self, p)
     }
 }
 
-impl DropoutLayer {
+impl Dropout {
     pub fn new(p: f64) -> Self {
         Self { p }
     }
 
+    pub fn apply<A, S, D>(&self, input: &ArrayBase<S, D>) -> Array<A, D>
+    where
+        A: Num + ScalarOperand,
+        D: Dimension,
+        S: DataOwned<Elem = A>,
+    {
+        _dropout(input, self.p)
+    }
+
     pub fn scale(&self) -> f64 {
         (1f64 - self.p).recip()
     }
 }
 
-impl Default for DropoutLayer {
+impl Default for Dropout {
     fn default() -> Self {
         Self::new(0.5)
     }
 }
 
 #[cfg(feature = "rand")]
-impl<A, S, D> Forward<ArrayBase<S, D>> for DropoutLayer
+impl<A, S, D> Forward<ArrayBase<S, D>> for Dropout
 where
     A: Num + ScalarOperand,
     D: Dimension,

diff --git a/core/tests/nn.rs b/core/tests/nn.rs
@@ -1,7 +1,7 @@
 #![allow(unused_imports)]
 extern crate concision_core as concision;
 
-use concision::nn::DropoutLayer;
+use concision::nn::Dropout;
 use concision::Forward;
 use ndarray::prelude::*;
 
@@ -10,7 +10,7 @@ use ndarray::prelude::*;
 fn test_dropout() {
     let shape = (512, 2048);
     let arr = Array2::<f64>::ones(shape);
-    let dropout = DropoutLayer::new(0.5);
+    let dropout = Dropout::new(0.5);
     let out = dropout.forward(&arr);
 
     assert!(arr.iter().all(|&x| x == 1.0));

diff --git a/...ransformers/src/attention/multi/config.rs → models/transformers/src/attention/config.rs b/...ransformers/src/attention/multi/config.rs → models/transformers/src/attention/config.rs
@@ -9,13 +9,17 @@ pub(crate) fn dk(d_model: usize, heads: usize) -> usize {
 
 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
-pub struct Config {
-    pub d_model: usize,
-    pub heads: usize,
+pub struct AttentionConfig {
+    pub d_model: usize, // embedding size; default is 512
+    pub heads: usize,   // number of heads; default is 8
 }
 
-impl Config {
-    pub fn new() -> ConfigBuilder {
+impl AttentionConfig {
+    pub fn new(d_model: usize, heads: usize) -> Self {
+        Self { d_model, heads }
+    }
+    ///
+    pub fn create() -> ConfigBuilder {
         ConfigBuilder::new()
     }
 
@@ -32,7 +36,7 @@ impl Config {
     }
 }
 
-impl Default for Config {
+impl Default for AttentionConfig {
     fn default() -> Self {
         Self {
             d_model: crate::D_MODEL,
@@ -42,7 +46,7 @@ impl Default for Config {
 }
 
 concision::builder! {
-    ConfigBuilder(Config) {
+    ConfigBuilder(AttentionConfig) {
         d_model: usize,
         heads: usize,
     }

diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs
@@ -5,7 +5,7 @@
 use super::{Score, _attention};
 use crate::params::QkvBase;
 use concision::getters;
-use concision::nn::DropoutLayer;
+use concision::nn::Dropout;
 use nd::linalg::Dot;
 use nd::*;
 use num::complex::ComplexFloat;
@@ -16,13 +16,14 @@ use num::complex::ComplexFloat;
 /// be flexible, relying upon the n-dimensional [QkvBase] to store the query, key, and value tensors.
 /// More so, the head may be configured with an optional dropout and/or masking layers.
 ///
-/// ### Dropout
+/// ### `dropout`
 ///
-/// The [DropoutLayer] is an optional layer applied after the softmax function is applied to the
-/// score. The layer is used to prevent overfitting by randomly setting a fraction of the input
+/// The [Dropout] layer is an optional, conditionally enabled layer (required the `rand` feature).
+/// If enabled, the dropout layer is invoked after the softmax function is applied to the score.
+/// The layer is used to prevent overfitting by randomly setting a fraction of the input
 /// units to zero at each update during training time.
 ///
-/// ### Masking
+/// ### `mask`
 ///
 /// After computing the dot-product of the query and key tensors, an optional mask may be applied to
 /// the attention score. The mask is used to prevent the model from attending to certain parts of the
@@ -34,7 +35,7 @@ where
     S: RawData<Elem = A>,
 {
     #[cfg(feature = "rand")]
-    pub(crate) dropout: Option<DropoutLayer>,
+    pub(crate) dropout: Option<Dropout>,
     pub(crate) mask: Option<Array<bool, D>>,
     pub(crate) params: QkvBase<S, D>,
 }
@@ -48,7 +49,7 @@ where
         A: Default,
         S: DataOwned,
     {
-        Self::from_params(QkvBase::new((dk, dm)))
+        Self::from_params(QkvBase::std(dk, dm))
     }
 }
 
@@ -115,7 +116,7 @@ where
     }
     /// Sets the dropout layer for the [AttentionHead]
     #[cfg(feature = "rand")]
-    pub fn set_dropout(&mut self, dropout: Option<DropoutLayer>) {
+    pub fn set_dropout(&mut self, dropout: Option<Dropout>) {
         self.dropout = dropout;
     }
     /// Sets the mask for the [AttentionHead]
@@ -124,7 +125,7 @@ where
     }
     /// Configure the [AttentionHead] with a [DropoutLayer]
     #[cfg(feature = "rand")]
-    pub fn with_dropout(self, dropout: DropoutLayer) -> Self {
+    pub fn with_dropout(self, dropout: Dropout) -> Self {
         Self {
             dropout: Some(dropout),
             ..self
@@ -153,7 +154,7 @@ where
     /// Returns an immutable reference to the, optional, [dropout](DropoutLayer) layer.
     /// With the `rand` feature flag disabled, the dropout layer is
     /// unavailable and returns `None`.
-    pub fn dropout(&self) -> Option<&DropoutLayer> {
+    pub fn dropout(&self) -> Option<&Dropout> {
         self.dropout.as_ref()
     }
 }
@@ -168,7 +169,7 @@ where
     /// With the `rand` feature flag disabled, the dropout layer is
     /// unavailable and returns `None`.
     #[cfg(not(feature = "rand"))]
-    pub fn dropout(&self) -> Option<&DropoutLayer> {
+    pub fn dropout(&self) -> Option<&Dropout> {
         None
     }
 }
diff --git a/models/transformers/src/attention/mod.rs b/models/transformers/src/attention/mod.rs
@@ -9,10 +9,10 @@
 //! the Transformer model, primarily due to its capabilities in natural language
 //! processing (NLP) domains
 pub(crate) use self::_impl_methods::*;
-pub use self::head::AttentionHead;
-pub use self::score::Score;
+pub use self::{config::AttentionConfig, head::AttentionHead, score::Score};
 pub use self::utils::*;
 
+pub(crate) mod config;
 pub(crate) mod head;
 pub(crate) mod score;
 
@@ -34,7 +34,7 @@ pub trait Attention {
 
 pub(crate) mod utils {
     use super::Score;
-    use concision::nn::DropoutLayer;
+    use concision::nn::Dropout;
     use nd::linalg::Dot;
     use nd::prelude::*;
     use num::complex::ComplexFloat;
@@ -45,7 +45,7 @@ pub(crate) mod utils {
         k: &ArrayBase<S, D>,
         v: &ArrayBase<S, D>,
         mask: Option<&Array<bool, D>>,
-        dropout: Option<&DropoutLayer>,
+        dropout: Option<&Dropout>,
     ) -> Score<A, D>
     where
         A: ComplexFloat + nd::ScalarOperand,
@@ -60,7 +60,7 @@ pub(crate) mod utils {
 
 mod _impl_methods {
     use super::Score;
-    use concision::prelude::{DropoutLayer, MaskFill, Softmax};
+    use concision::prelude::{Dropout, MaskFill, Softmax};
     use nd::linalg::Dot;
     use nd::prelude::*;
     use num::complex::ComplexFloat;
@@ -70,7 +70,7 @@ mod _impl_methods {
         k: &ArrayBase<S, D>,
         v: &ArrayBase<S, D>,
         mask: Option<&Array<bool, D>>,
-        dropout: Option<&DropoutLayer>,
+        dropout: Option<&Dropout>,
     ) -> Score<A, D>
     where
         A: ComplexFloat + nd::ScalarOperand,

diff --git a/models/transformers/src/attention/multi/mod.rs b/models/transformers/src/attention/multi/mod.rs
@@ -5,12 +5,11 @@
 //! # Multi-Head Attention
 //!
 //!
-pub use self::{config::Config, multi_head::*};
+pub use self::multi_head::*;
 
-pub(crate) mod config;
+// pub(crate) mod config;
 pub(crate) mod multi_head;
 
 pub(crate) mod prelude {
-    pub use super::config::Config as MultiHeadAttentionConfig;
     pub use super::multi_head::MultiHeadAttention;
 }
diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs
@@ -2,8 +2,7 @@
     Appellation: multi_head <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-use super::Config;
-use crate::AttentionHead;
+use crate::{attention::AttentionConfig, AttentionHead};
 use linear::{Biased, Linear};
 use nd::prelude::*;
 use nd::{DataOwned, OwnedRepr, RawData};
@@ -13,7 +12,7 @@ where
     D: Dimension,
     S: RawData<Elem = A>,
 {
-    pub(crate) config: Config,
+    pub(crate) config: AttentionConfig,
     pub(crate) head: AttentionHead<A, D, S>,
     pub(crate) linears: Vec<Linear<A, Biased, D, S>>,
 }
@@ -23,7 +22,7 @@ where
     D: Dimension,
     S: RawData<Elem = A>,
 {
-    pub const fn config(&self) -> &Config {
+    pub const fn config(&self) -> &AttentionConfig {
         &self.config
     }
 
@@ -49,7 +48,7 @@ where
         A: Clone + Default,
         S: DataOwned,
     {
-        let config = Config::new().d_model(d_model).heads(heads).build();
+        let config = AttentionConfig::new(d_model, heads);
         let linears = (0..4)
             .map(|_| Linear::from_features(d_model, d_model))
             .collect();
@@ -69,7 +68,7 @@ where
 {
     fn default() -> Self {
         Self {
-            config: Config::default(),
+            config: AttentionConfig::default(),
             head: AttentionHead::default(),
             linears: Vec::new(),
         }

diff --git a/models/transformers/src/attention/score.rs b/models/transformers/src/attention/score.rs
@@ -33,7 +33,6 @@ where
     pub fn into_score(self) -> Array<A, D> {
         self.score
     }
-
     /// Retrieve the attention tensor.
     pub fn attention(&self) -> &Array<A, D> {
         &self.attention

diff --git a/models/transformers/src/config/mod.rs b/models/transformers/src/config/mod.rs
@@ -0,0 +1,46 @@
+/*
+    Appellation: config <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+
+pub struct TransformerConfig {
+    pub heads: usize,
+}
+
+pub struct Features {
+
+    pub d_model: usize,
+
+}
+
+pub struct QkvShape {
+    pub dq: usize,
+    pub dk: usize,
+    pub dv: usize,
+}
+
+impl QkvShape {
+    pub fn new(dq: usize, dk: usize, dv: usize) -> Self {
+        Self {
+            dq,
+            dk,
+            dv,
+        }
+    }
+
+    pub fn std(dk: usize) -> Self {
+        let (dq, dv) = (dk, dk);
+
+        Self::new(dq, dk, dv)
+    }
+}
+
+
+pub struct EmbedConfig {
+
+}
+
+pub struct FFNConfig {
+
+}
diff --git a/models/transformers/src/ffn/mod.rs b/models/transformers/src/ffn/mod.rs