From 3bfd0684c0a41b23a41723daf71627ea1b59a52a Mon Sep 17 00:00:00 2001
From: Joe McCain III <jo3mccain@icloud.com>
Date: Sat, 25 May 2024 09:59:30 -0500
Subject: [PATCH] update

Signed-off-by: Joe McCain III <jo3mccain@icloud.com>
---
 core/src/nn/dropout.rs                        | 25 ++++++----
 core/tests/nn.rs                              |  4 +-
 .../src/attention/{multi => }/config.rs       | 18 +++++---
 models/transformers/src/attention/head.rs     | 23 +++++-----
 models/transformers/src/attention/mod.rs      | 12 ++---
 .../transformers/src/attention/multi/mod.rs   |  5 +-
 .../src/attention/multi/multi_head.rs         | 11 ++---
 models/transformers/src/attention/score.rs    |  1 -
 models/transformers/src/config/mod.rs         | 46 +++++++++++++++++++
 models/transformers/src/ffn/mod.rs            |  7 ---
 models/transformers/src/lib.rs                | 11 ++---
 .../src/{ffn/model.rs => model/ffn.rs}        | 34 ++++++++------
 models/transformers/src/model/mod.rs          |  1 +
 models/transformers/src/model/sublayer.rs     |  8 ++--
 models/transformers/src/params/mod.rs         |  8 ++--
 .../src/params/{store.rs => qkv.rs}           | 10 +++-
 models/transformers/tests/ffn.rs              |  4 +-
 models/transformers/tests/ops.rs              |  2 +-
 18 files changed, 145 insertions(+), 85 deletions(-)
 rename models/transformers/src/attention/{multi => }/config.rs (66%)
 create mode 100644 models/transformers/src/config/mod.rs
 delete mode 100644 models/transformers/src/ffn/mod.rs
 rename models/transformers/src/{ffn/model.rs => model/ffn.rs} (63%)
 rename models/transformers/src/params/{store.rs => qkv.rs} (93%)
diff --git a/core/src/nn/dropout.rs b/core/src/nn/dropout.rs
index 19acdbc..772d2fa 100644
--- a/core/src/nn/dropout.rs
+++ b/core/src/nn/dropout.rs
@@ -11,7 +11,7 @@ use ndrand::{rand_distr::Bernoulli, RandomExt};
 use num::traits::Num;
 
 #[cfg(feature = "rand")]
-pub fn dropout<A, S, D>(array: &ArrayBase<S, D>, p: f64) -> Array<A, D>
+pub(crate) fn _dropout<A, S, D>(array: &ArrayBase<S, D>, p: f64) -> Array<A, D>
 where
     A: Num + ScalarOperand,
     D: Dimension,
@@ -29,7 +29,7 @@ where
 }
 
 /// [Dropout] randomly zeroizes elements with a given probability (`p`).
-pub trait Dropout {
+pub trait DropOut {
     type Output;
 
     fn dropout(&self, p: f64) -> Self::Output;
@@ -44,7 +44,7 @@ pub trait Dropout {
 /// - (p) Probability of dropping an element
 #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
-pub struct DropoutLayer {
+pub struct Dropout {
     pub(crate) p: f64,
 }
 
@@ -52,7 +52,7 @@ pub struct DropoutLayer {
  ************* Implementations *************
 */
 #[cfg(feature = "rand")]
-impl<A, S, D> Dropout for ArrayBase<S, D>
+impl<A, S, D> DropOut for ArrayBase<S, D>
 where
     A: Num + ScalarOperand,
     D: Dimension,
@@ -61,28 +61,37 @@ where
     type Output = Array<A, D>;
 
     fn dropout(&self, p: f64) -> Self::Output {
-        dropout(self, p)
+        _dropout(self, p)
     }
 }
 
-impl DropoutLayer {
+impl Dropout {
     pub fn new(p: f64) -> Self {
         Self { p }
     }
 
+    pub fn apply<A, S, D>(&self, input: &ArrayBase<S, D>) -> Array<A, D>
+    where
+        A: Num + ScalarOperand,
+        D: Dimension,
+        S: DataOwned<Elem = A>,
+    {
+        _dropout(input, self.p)
+    }
+
     pub fn scale(&self) -> f64 {
         (1f64 - self.p).recip()
     }
 }
 
-impl Default for DropoutLayer {
+impl Default for Dropout {
     fn default() -> Self {
         Self::new(0.5)
     }
 }
 
 #[cfg(feature = "rand")]
-impl<A, S, D> Forward<ArrayBase<S, D>> for DropoutLayer
+impl<A, S, D> Forward<ArrayBase<S, D>> for Dropout
 where
     A: Num + ScalarOperand,
     D: Dimension,
diff --git a/core/tests/nn.rs b/core/tests/nn.rs
index 55b5119..e6ba7e5 100644
--- a/core/tests/nn.rs
+++ b/core/tests/nn.rs
@@ -1,7 +1,7 @@
 #![allow(unused_imports)]
 extern crate concision_core as concision;
 
-use concision::nn::DropoutLayer;
+use concision::nn::Dropout;
 use concision::Forward;
 use ndarray::prelude::*;
 
@@ -10,7 +10,7 @@ use ndarray::prelude::*;
 fn test_dropout() {
     let shape = (512, 2048);
     let arr = Array2::<f64>::ones(shape);
-    let dropout = DropoutLayer::new(0.5);
+    let dropout = Dropout::new(0.5);
     let out = dropout.forward(&arr);
 
     assert!(arr.iter().all(|&x| x == 1.0));
diff --git a/models/transformers/src/attention/multi/config.rs b/models/transformers/src/attention/config.rs
similarity index 66%
rename from models/transformers/src/attention/multi/config.rs
rename to models/transformers/src/attention/config.rs
index 58c510c..18b7820 100644
--- a/models/transformers/src/attention/multi/config.rs
+++ b/models/transformers/src/attention/config.rs
@@ -9,13 +9,17 @@ pub(crate) fn dk(d_model: usize, heads: usize) -> usize {
 
 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
-pub struct Config {
-    pub d_model: usize,
-    pub heads: usize,
+pub struct AttentionConfig {
+    pub d_model: usize, // embedding size; default is 512
+    pub heads: usize,   // number of heads; default is 8
 }
 
-impl Config {
-    pub fn new() -> ConfigBuilder {
+impl AttentionConfig {
+    pub fn new(d_model: usize, heads: usize) -> Self {
+        Self { d_model, heads }
+    }
+    ///
+    pub fn create() -> ConfigBuilder {
         ConfigBuilder::new()
     }
 
@@ -32,7 +36,7 @@ impl Config {
     }
 }
 
-impl Default for Config {
+impl Default for AttentionConfig {
     fn default() -> Self {
         Self {
             d_model: crate::D_MODEL,
@@ -42,7 +46,7 @@ impl Default for Config {
 }
 
 concision::builder! {
-    ConfigBuilder(Config) {
+    ConfigBuilder(AttentionConfig) {
         d_model: usize,
         heads: usize,
     }
diff --git a/models/transformers/src/attention/head.rs b/models/transformers/src/attention/head.rs
index e80fdda..76e69f0 100644
--- a/models/transformers/src/attention/head.rs
+++ b/models/transformers/src/attention/head.rs
@@ -5,7 +5,7 @@
 use super::{Score, _attention};
 use crate::params::QkvBase;
 use concision::getters;
-use concision::nn::DropoutLayer;
+use concision::nn::Dropout;
 use nd::linalg::Dot;
 use nd::*;
 use num::complex::ComplexFloat;
@@ -16,13 +16,14 @@ use num::complex::ComplexFloat;
 /// be flexible, relying upon the n-dimensional [QkvBase] to store the query, key, and value tensors.
 /// More so, the head may be configured with an optional dropout and/or masking layers.
 ///
-/// ### Dropout
+/// ### `dropout`
 ///
-/// The [DropoutLayer] is an optional layer applied after the softmax function is applied to the
-/// score. The layer is used to prevent overfitting by randomly setting a fraction of the input
+/// The [Dropout] layer is an optional, conditionally enabled layer (required the `rand` feature).
+/// If enabled, the dropout layer is invoked after the softmax function is applied to the score.
+/// The layer is used to prevent overfitting by randomly setting a fraction of the input
 /// units to zero at each update during training time.
 ///
-/// ### Masking
+/// ### `mask`
 ///
 /// After computing the dot-product of the query and key tensors, an optional mask may be applied to
 /// the attention score. The mask is used to prevent the model from attending to certain parts of the
@@ -34,7 +35,7 @@ where
     S: RawData<Elem = A>,
 {
     #[cfg(feature = "rand")]
-    pub(crate) dropout: Option<DropoutLayer>,
+    pub(crate) dropout: Option<Dropout>,
     pub(crate) mask: Option<Array<bool, D>>,
     pub(crate) params: QkvBase<S, D>,
 }
@@ -48,7 +49,7 @@ where
         A: Default,
         S: DataOwned,
     {
-        Self::from_params(QkvBase::new((dk, dm)))
+        Self::from_params(QkvBase::std(dk, dm))
     }
 }
 
@@ -115,7 +116,7 @@ where
     }
     /// Sets the dropout layer for the [AttentionHead]
     #[cfg(feature = "rand")]
-    pub fn set_dropout(&mut self, dropout: Option<DropoutLayer>) {
+    pub fn set_dropout(&mut self, dropout: Option<Dropout>) {
         self.dropout = dropout;
     }
     /// Sets the mask for the [AttentionHead]
@@ -124,7 +125,7 @@ where
     }
     /// Configure the [AttentionHead] with a [DropoutLayer]
     #[cfg(feature = "rand")]
-    pub fn with_dropout(self, dropout: DropoutLayer) -> Self {
+    pub fn with_dropout(self, dropout: Dropout) -> Self {
         Self {
             dropout: Some(dropout),
             ..self
@@ -153,7 +154,7 @@ where
     /// Returns an immutable reference to the, optional, [dropout](DropoutLayer) layer.
     /// With the `rand` feature flag disabled, the dropout layer is
     /// unavailable and returns `None`.
-    pub fn dropout(&self) -> Option<&DropoutLayer> {
+    pub fn dropout(&self) -> Option<&Dropout> {
         self.dropout.as_ref()
     }
 }
@@ -168,7 +169,7 @@ where
     /// With the `rand` feature flag disabled, the dropout layer is
     /// unavailable and returns `None`.
     #[cfg(not(feature = "rand"))]
-    pub fn dropout(&self) -> Option<&DropoutLayer> {
+    pub fn dropout(&self) -> Option<&Dropout> {
         None
     }
 }
diff --git a/models/transformers/src/attention/mod.rs b/models/transformers/src/attention/mod.rs
index a500b5f..2f4c729 100644
--- a/models/transformers/src/attention/mod.rs
+++ b/models/transformers/src/attention/mod.rs
@@ -9,10 +9,10 @@
 //! the Transformer model, primarily due to its capabilities in natural language
 //! processing (NLP) domains
 pub(crate) use self::_impl_methods::*;
-pub use self::head::AttentionHead;
-pub use self::score::Score;
+pub use self::{config::AttentionConfig, head::AttentionHead, score::Score};
 pub use self::utils::*;
 
+pub(crate) mod config;
 pub(crate) mod head;
 pub(crate) mod score;
 
@@ -34,7 +34,7 @@ pub trait Attention {
 
 pub(crate) mod utils {
     use super::Score;
-    use concision::nn::DropoutLayer;
+    use concision::nn::Dropout;
     use nd::linalg::Dot;
     use nd::prelude::*;
     use num::complex::ComplexFloat;
@@ -45,7 +45,7 @@ pub(crate) mod utils {
         k: &ArrayBase<S, D>,
         v: &ArrayBase<S, D>,
         mask: Option<&Array<bool, D>>,
-        dropout: Option<&DropoutLayer>,
+        dropout: Option<&Dropout>,
     ) -> Score<A, D>
     where
         A: ComplexFloat + nd::ScalarOperand,
@@ -60,7 +60,7 @@ pub(crate) mod utils {
 
 mod _impl_methods {
     use super::Score;
-    use concision::prelude::{DropoutLayer, MaskFill, Softmax};
+    use concision::prelude::{Dropout, MaskFill, Softmax};
     use nd::linalg::Dot;
     use nd::prelude::*;
     use num::complex::ComplexFloat;
@@ -70,7 +70,7 @@ mod _impl_methods {
         k: &ArrayBase<S, D>,
         v: &ArrayBase<S, D>,
         mask: Option<&Array<bool, D>>,
-        dropout: Option<&DropoutLayer>,
+        dropout: Option<&Dropout>,
     ) -> Score<A, D>
     where
         A: ComplexFloat + nd::ScalarOperand,
diff --git a/models/transformers/src/attention/multi/mod.rs b/models/transformers/src/attention/multi/mod.rs
index e101f03..4125126 100644
--- a/models/transformers/src/attention/multi/mod.rs
+++ b/models/transformers/src/attention/multi/mod.rs
@@ -5,12 +5,11 @@
 //! # Multi-Head Attention
 //!
 //!
-pub use self::{config::Config, multi_head::*};
+pub use self::multi_head::*;
 
-pub(crate) mod config;
+// pub(crate) mod config;
 pub(crate) mod multi_head;
 
 pub(crate) mod prelude {
-    pub use super::config::Config as MultiHeadAttentionConfig;
     pub use super::multi_head::MultiHeadAttention;
 }
diff --git a/models/transformers/src/attention/multi/multi_head.rs b/models/transformers/src/attention/multi/multi_head.rs
index 36a4051..f6baa7c 100644
--- a/models/transformers/src/attention/multi/multi_head.rs
+++ b/models/transformers/src/attention/multi/multi_head.rs
@@ -2,8 +2,7 @@
     Appellation: multi_head <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-use super::Config;
-use crate::AttentionHead;
+use crate::{attention::AttentionConfig, AttentionHead};
 use linear::{Biased, Linear};
 use nd::prelude::*;
 use nd::{DataOwned, OwnedRepr, RawData};
@@ -13,7 +12,7 @@ where
     D: Dimension,
     S: RawData<Elem = A>,
 {
-    pub(crate) config: Config,
+    pub(crate) config: AttentionConfig,
     pub(crate) head: AttentionHead<A, D, S>,
     pub(crate) linears: Vec<Linear<A, Biased, D, S>>,
 }
@@ -23,7 +22,7 @@ where
     D: Dimension,
     S: RawData<Elem = A>,
 {
-    pub const fn config(&self) -> &Config {
+    pub const fn config(&self) -> &AttentionConfig {
         &self.config
     }
 
@@ -49,7 +48,7 @@ where
         A: Clone + Default,
         S: DataOwned,
     {
-        let config = Config::new().d_model(d_model).heads(heads).build();
+        let config = AttentionConfig::new(d_model, heads);
         let linears = (0..4)
             .map(|_| Linear::from_features(d_model, d_model))
             .collect();
@@ -69,7 +68,7 @@ where
 {
     fn default() -> Self {
         Self {
-            config: Config::default(),
+            config: AttentionConfig::default(),
             head: AttentionHead::default(),
             linears: Vec::new(),
         }
diff --git a/models/transformers/src/attention/score.rs b/models/transformers/src/attention/score.rs
index 3e1df96..5451f7e 100644
--- a/models/transformers/src/attention/score.rs
+++ b/models/transformers/src/attention/score.rs
@@ -33,7 +33,6 @@ where
     pub fn into_score(self) -> Array<A, D> {
         self.score
     }
-
     /// Retrieve the attention tensor.
     pub fn attention(&self) -> &Array<A, D> {
         &self.attention
diff --git a/models/transformers/src/config/mod.rs b/models/transformers/src/config/mod.rs
new file mode 100644
index 0000000..b052876
--- /dev/null
+++ b/models/transformers/src/config/mod.rs
@@ -0,0 +1,46 @@
+/*
+    Appellation: config <module>
+    Contrib: FL03 <jo3mccain@icloud.com>
+*/
+
+
+pub struct TransformerConfig {
+    pub heads: usize,
+}
+
+pub struct Features {
+    
+    pub d_model: usize,
+
+}
+
+pub struct QkvShape {
+    pub dq: usize,
+    pub dk: usize,
+    pub dv: usize,
+}
+
+impl QkvShape {
+    pub fn new(dq: usize, dk: usize, dv: usize) -> Self {
+        Self {
+            dq,
+            dk,
+            dv,
+        }
+    }
+    
+    pub fn std(dk: usize) -> Self {
+        let (dq, dv) = (dk, dk);
+
+        Self::new(dq, dk, dv)
+    }
+}
+
+
+pub struct EmbedConfig {
+
+}
+
+pub struct FFNConfig {
+
+}
\ No newline at end of file
diff --git a/models/transformers/src/ffn/mod.rs b/models/transformers/src/ffn/mod.rs
deleted file mode 100644
index 2ce65f8..0000000
--- a/models/transformers/src/ffn/mod.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
-    Appellation: model <module>
-    Contrib: FL03 <jo3mccain@icloud.com>
-*/
-pub use self::model::FeedForwardNetwork;
-
-pub(crate) mod model;
diff --git a/models/transformers/src/lib.rs b/models/transformers/src/lib.rs
index 6dae5eb..a6439f8 100644
--- a/models/transformers/src/lib.rs
+++ b/models/transformers/src/lib.rs
@@ -18,13 +18,7 @@ extern crate concision_linear as linear;
 extern crate ndarray as nd;
 
 #[doc(inline)]
-pub use self::attention::prelude::{
-    scaled_dot_product_attention, AttentionHead, MultiHeadAttention,
-};
-#[doc(inline)]
-pub use self::transformer::Transformer;
-#[doc(inline)]
-pub use self::{params::*, primitives::*};
+pub use self::{attention::prelude::*, params::prelude::*, ops::prelude::*, primitives::*, transformer::Transformer};
 
 #[macro_use]
 pub(crate) mod macros;
@@ -33,7 +27,7 @@ pub(crate) mod transformer;
 
 pub mod attention;
 pub mod codec;
-pub mod ffn;
+pub mod config;
 pub mod model;
 pub mod ops;
 pub mod params;
@@ -46,5 +40,6 @@ mod impls {
 
 pub mod prelude {
     pub use super::attention::prelude::*;
+    pub use super::params::prelude::*;
     pub use super::Transformer;
 }
diff --git a/models/transformers/src/ffn/model.rs b/models/transformers/src/model/ffn.rs
similarity index 63%
rename from models/transformers/src/ffn/model.rs
rename to models/transformers/src/model/ffn.rs
index 75f5803..00a29ab 100644
--- a/models/transformers/src/ffn/model.rs
+++ b/models/transformers/src/model/ffn.rs
@@ -2,32 +2,40 @@
     Appellation: model <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-use concision::prelude::{DropoutLayer, Forward, Predict, PredictError, ReLU};
+use concision::prelude::{Dropout, Forward, Predict, PredictError, ReLU};
 use linear::{Biased, Linear, ParamMode};
 use nd::prelude::*;
 use nd::{RemoveAxis, ScalarOperand};
 use num::traits::Num;
 
-// 
-pub struct FeedForwardNetwork<A = f64, D = Ix2, K = Biased>
+// #84: FeedForwardNetwork
+/// A piecewise, feed-forward neural network consisting of two [Linear] layers with a ReLU activation function
+/// optionally (and conditionally) supporting an [Dropout] layer.
+///
+/// ### Shape
+///
+/// - d_model: Embedding size
+/// - d_ff: upward projection
+///
+pub struct FeedForwardNetwork<A = f64, K = Biased, D = Ix2>
 where
     D: Dimension,
 {
     #[cfg(feature = "rand")]
-    pub(crate) dropout: Option<DropoutLayer>,
+    pub(crate) dropout: Option<Dropout>,
     pub(crate) input: Linear<A, K, D>,
     pub(crate) output: Linear<A, K, D>,
 }
 
-impl<A, K> FeedForwardNetwork<A, Ix2, K>
+impl<A, K> FeedForwardNetwork<A, K, Ix2>
 where
     K: ParamMode,
 {
-    pub fn new(d_model: usize, features: usize, dropout: Option<f64>) -> Self
+    pub fn std(d_model: usize, features: usize, dropout: Option<f64>) -> Self
     where
         A: Clone + Default,
     {
-        let dropout = dropout.map(|p| DropoutLayer::new(p));
+        let dropout = dropout.map(|p| Dropout::new(p));
         let input = Linear::from_features(d_model, features);
         let output = Linear::from_features(features, d_model);
         Self {
@@ -38,7 +46,7 @@ where
     }
 }
 
-impl<A, D, K> FeedForwardNetwork<A, D, K>
+impl<A, D, K> FeedForwardNetwork<A, K, D>
 where
     D: Dimension,
 {
@@ -52,26 +60,26 @@ where
 }
 
 #[cfg(feature = "rand")]
-impl<A, D, K> FeedForwardNetwork<A, D, K>
+impl<A, D, K> FeedForwardNetwork<A, K, D>
 where
     D: Dimension,
 {
-    pub fn dropout(&self) -> Option<&DropoutLayer> {
+    pub fn dropout(&self) -> Option<&Dropout> {
         self.dropout.as_ref()
     }
 }
 
 #[cfg(not(feature = "rand"))]
-impl<A, D, K> FeedForwardNetwork<A, D, K>
+impl<A, D, K> FeedForwardNetwork<A, K, D>
 where
     D: Dimension,
 {
-    pub fn dropout(&self) -> Option<&DropoutLayer> {
+    pub fn dropout(&self) -> Option<&Dropout> {
         None
     }
 }
 
-impl<A, B, D, E, K> Predict<Array<B, E>> for FeedForwardNetwork<A, D, K>
+impl<A, B, D, E, K> Predict<Array<B, E>> for FeedForwardNetwork<A, K, D>
 where
     B: Num + PartialOrd + ScalarOperand,
     D: RemoveAxis,
diff --git a/models/transformers/src/model/mod.rs b/models/transformers/src/model/mod.rs
index ac227da..35cce20 100644
--- a/models/transformers/src/model/mod.rs
+++ b/models/transformers/src/model/mod.rs
@@ -3,4 +3,5 @@
     Contrib: FL03 <jo3mccain@icloud.com>
 */
 
+pub mod ffn;
 pub mod sublayer;
diff --git a/models/transformers/src/model/sublayer.rs b/models/transformers/src/model/sublayer.rs
index a1a5fbe..f6fbb6c 100644
--- a/models/transformers/src/model/sublayer.rs
+++ b/models/transformers/src/model/sublayer.rs
@@ -3,7 +3,7 @@
     Contrib: FL03 <jo3mccain@icloud.com>
 */
 #![cfg(feature = "rand")]
-use concision::nn::DropoutLayer;
+use concision::nn::Dropout;
 use concision::Forward;
 use linear::{Biased, LayerNorm, ParamMode, Unbiased};
 use nd::prelude::*;
@@ -16,7 +16,7 @@ pub struct Sublayer<A = f64, K = Biased, D = Ix2>
 where
     D: Dimension,
 {
-    pub(crate) dropout: DropoutLayer,
+    pub(crate) dropout: Dropout,
     pub(crate) norm: LayerNorm<A, K, D>,
 }
 
@@ -31,12 +31,12 @@ where
         Sh: ShapeBuilder<Dim = D>,
     {
         Self {
-            dropout: DropoutLayer::new(dropout),
+            dropout: Dropout::new(dropout),
             norm: LayerNorm::new(shape),
         }
     }
 
-    pub fn dropout(&self) -> &DropoutLayer {
+    pub fn dropout(&self) -> &Dropout {
         &self.dropout
     }
 
diff --git a/models/transformers/src/params/mod.rs b/models/transformers/src/params/mod.rs
index ba79e10..ba0136f 100644
--- a/models/transformers/src/params/mod.rs
+++ b/models/transformers/src/params/mod.rs
@@ -2,9 +2,9 @@
     Appellation: params <module>
     Contrib: FL03 <jo3mccain@icloud.com>
 */
-pub use self::{item::*, store::QkvBase};
+pub use self::{item::*, qkv::QkvBase};
 
-mod store;
+mod qkv;
 
 pub mod item;
 
@@ -31,7 +31,7 @@ params_ty!(
 
 #[allow(unused_imports)]
 pub(crate) mod prelude {
-    pub use super::item::{Entry, QKV};
-    pub use super::store::QkvBase;
+    pub use super::item::QKV;
+    pub use super::qkv::QkvBase;
     pub use super::{ArcQkv, Qkv, ViewQkv};
 }
diff --git a/models/transformers/src/params/store.rs b/models/transformers/src/params/qkv.rs
similarity index 93%
rename from models/transformers/src/params/store.rs
rename to models/transformers/src/params/qkv.rs
index f59ee6e..d35298e 100644
--- a/models/transformers/src/params/store.rs
+++ b/models/transformers/src/params/qkv.rs
@@ -3,7 +3,7 @@
     Contrib: FL03 <jo3mccain@icloud.com>
 */
 use crate::attention::{Score, _attention};
-use concision::nn::DropoutLayer;
+use concision::nn::Dropout;
 use concision::{dimensional, getters};
 use nd::linalg::Dot;
 use nd::*;
@@ -88,6 +88,12 @@ where
     qkv_view!(view_mut::<'a, ViewRepr>(&mut self) where S: DataMut);
 }
 
+impl<A, S> QkvBase<S, Ix2> where S: RawData<Elem = A> {
+    pub fn std(dk: usize, d_model: usize) -> Self where A: Default, S: DataOwned {
+        Self::new((dk, d_model))
+    }
+}
+
 #[cfg(not(feature = "rand"))]
 impl<A, S, D> QkvBase<S, D>
 where
@@ -123,7 +129,7 @@ where
         ArrayBase<S, D>: for<'a> Dot<ArrayView<'a, A, D>, Output = Array<A, D>>,
         Array<A, D>: Dot<ArrayBase<S, D>, Output = Array<A, D>>,
     {
-        let dropout = dropout.map(DropoutLayer::new);
+        let dropout = dropout.map(Dropout::new);
         let (q, k, v) = self.qkv();
         _attention(q, k, v, mask, dropout.as_ref())
     }
diff --git a/models/transformers/tests/ffn.rs b/models/transformers/tests/ffn.rs
index 3bf9590..ad37ab8 100644
--- a/models/transformers/tests/ffn.rs
+++ b/models/transformers/tests/ffn.rs
@@ -8,14 +8,14 @@ extern crate concision_transformer as transformer;
 
 use concision::prelude::{linarr, Predict};
 use linear::Biased;
-use transformer::ffn::FeedForwardNetwork;
+use transformer::model::ffn::FeedForwardNetwork;
 
 use ndarray::prelude::*;
 
 #[test]
 fn test_ffn() {
     let (samples, d_model, d_ff) = (100, 30, 3);
-    let model = FeedForwardNetwork::<f64, Ix2, Biased>::new(d_model, d_ff, Some(0.1));
+    let model = FeedForwardNetwork::<f64, Biased>::std(d_model, d_ff, Some(0.1));
 
     let data = linarr::<f64, Ix2>((samples, d_model)).unwrap();
 
diff --git a/models/transformers/tests/ops.rs b/models/transformers/tests/ops.rs
index f8407de..e9b5d0c 100644
--- a/models/transformers/tests/ops.rs
+++ b/models/transformers/tests/ops.rs
@@ -18,7 +18,7 @@ fn test_merge() {
     let shape = (3, 4, 5);
     let dout = (4, 15);
     let arr = linarr::<f64, Ix3>(shape.clone()).unwrap();
-    let a = arr.clone().merge().unwrap();
+    let a = arr.merge().unwrap();
 
     assert_eq!(a.dim(), dout);
     assert_eq!(a, utils::merge3(&arr).unwrap());