diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Environment.py b/Scripts/Environment.py
similarity index 100%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Environment.py
rename to Scripts/Environment.py
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/DeclanParams.py b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/DeclanParams.py
similarity index 84%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/DeclanParams.py
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/DeclanParams.py
index 7209121c186..c2dbbbf8180 100644
--- a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/DeclanParams.py
+++ b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/DeclanParams.py
@@ -39,10 +39,10 @@ def calc_prob(em_preds, test_ys):
# Names:
name = "EGO Model CSW",
+ em_name = "EM",
state_input_layer_name = "STATE",
previous_state_layer_name = "PREVIOUS STATE",
context_layer_name = 'CONTEXT',
- em_name = "EM",
prediction_layer_name = "PREDICTION",
# Structural
@@ -50,10 +50,10 @@ def calc_prob(em_preds, test_ys):
previous_state_d = 11, # length of state vector
context_d = 11, # length of context vector
memory_capacity = ALL, # number of entries in EM memory; ALL=> match to number of stims
- # memory_init = (0,.0001), # Initialize memory with random values in interval
- memory_init = None, # Initialize with zeros
- concatenate_queries = False,
- # concatenate_queries = True,
+ memory_init = (0,.0001), # Initialize memory with random values in interval
+ # memory_init = None, # Initialize with zeros
+ # concatenate_queries = False,
+ concatenate_queries = True,
# environment
# curriculum_type = 'Interleaved',
@@ -63,20 +63,23 @@ def calc_prob(em_preds, test_ys):
# Processing
integration_rate = .69, # rate at which state is integrated into new context
- # state_weight = 1, # weight of the state used during memory retrieval
+ # state_weight =normalize_field_weightsnormalize_field_weights 1, # weight of the state used during memory retrieval
# context_weight = 1, # weight of the context used during memory retrieval
- state_weight = .5, # weight of the state used during memory retrieval
+ previous_state_weight = .5, # weight of the state used during memory retrieval
context_weight = .5, # weight of the context used during memory retrieval
+ state_weight = None, # weight of the state used during memory retrieval
# normalize_field_weights = False, # whether to normalize the field weights during memory retrieval
normalize_field_weights = True, # whether to normalize the field weights during memory retrieval
+ normalize_memories = False, # whether to normalize the memory during memory retrieval
+ # normalize_memories = True, # whether to normalize the memory during memory retrieval
# softmax_temperature = None, # temperature of the softmax used during memory retrieval (smaller means more argmax-like
softmax_temperature = .1, # temperature of the softmax used during memory retrieval (smaller means more argmax-like
# softmax_temperature = ADAPTIVE, # temperature of the softmax used during memory retrieval (smaller means more argmax-like
# softmax_temperature = CONTROL, # temperature of the softmax used during memory retrieval (smaller means more argmax-like
# softmax_threshold = None, # threshold used to mask out small values in softmax
softmax_threshold = .001, # threshold used to mask out small values in softmax
- enable_learning=[False, False, True], # Enable learning for PREDICTION (STATE) but not CONTEXT or PREVIOUS STATE
- learn_field_weights = False,
+ # target_fields=[True, False, False], # Enable learning for PREDICTION (STATE) but not CONTEXT or PREVIOUS STATE
+ enable_learning = True,
loss_spec = Loss.BINARY_CROSS_ENTROPY,
# loss_spec = Loss.MSE,
learning_rate = .5,
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/EGO CSW Model (using RNN).py b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/EGO CSW Model (using RNN).py
similarity index 100%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/EGO CSW Model (using RNN).py
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/EGO CSW Model (using RNN).py
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/EGO CSW Model.py b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/EGO CSW Model.py
similarity index 91%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/EGO CSW Model.py
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/EGO CSW Model.py
index 18d3ba419b0..52423c7dbbf 100644
--- a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/EGO CSW Model.py
+++ b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/EGO CSW Model.py
@@ -174,7 +174,7 @@
'CONTEXT',
'PREVIOUS STATE'],
start=0)
-state_retrieval_weight = 0
+state_retrieval_weight = None
RANDOM_WEIGHTS_INITIALIZATION=RandomMatrix(center=0.0, range=0.1) # Matrix spec used to initialize all Projections
if is_numeric_scalar(model_params['softmax_temperature']): # translate to gain of softmax retrieval function
@@ -194,7 +194,7 @@ def construct_model(model_name:str=model_params['name'],
state_size:int=model_params['state_d'],
# Previous state
- previous_state_input_name:str=model_params['previous_state_layer_name'],
+ previous_state_name:str=model_params['previous_state_layer_name'],
# Context representation (learned):
context_name:str=model_params['context_layer_name'],
@@ -205,12 +205,15 @@ def construct_model(model_name:str=model_params['name'],
em_name:str=model_params['em_name'],
retrieval_softmax_gain=retrieval_softmax_gain,
retrieval_softmax_threshold=model_params['softmax_threshold'],
- state_retrieval_weight:Union[float,int]=state_retrieval_weight,
- previous_state_retrieval_weight:Union[float,int]=model_params['state_weight'],
+ # state_retrieval_weight:Union[float,int]=state_retrieval_weight,
+ # previous_state_retrieval_weight:Union[float,int]=model_params['state_weight'],
+ state_retrieval_weight:Union[float,int]=model_params['state_weight'],
+ previous_state_retrieval_weight:Union[float,int]=model_params['previous_state_weight'],
context_retrieval_weight:Union[float,int]=model_params['context_weight'],
normalize_field_weights = model_params['normalize_field_weights'],
+ normalize_memories = model_params['normalize_memories'],
concatenate_queries = model_params['concatenate_queries'],
- learn_field_weights = model_params['learn_field_weights'],
+ enable_learning = model_params['enable_learning'],
memory_capacity = memory_capacity,
memory_init=model_params['memory_init'],
@@ -219,7 +222,7 @@ def construct_model(model_name:str=model_params['name'],
# Learning
loss_spec=model_params['loss_spec'],
- enable_learning=model_params['enable_learning'],
+ # target_fields=model_params['target_fields'],
learning_rate = model_params['learning_rate'],
device=model_params['device']
@@ -233,7 +236,7 @@ def construct_model(model_name:str=model_params['name'],
# ----------------------------------------------------------------------------------------------------------------
state_input_layer = ProcessingMechanism(name=state_input_name, input_shapes=state_size)
- previous_state_layer = ProcessingMechanism(name=previous_state_input_name, input_shapes=state_size)
+ previous_state_layer = ProcessingMechanism(name=previous_state_name, input_shapes=state_size)
# context_layer = ProcessingMechanism(name=context_name, input_shapes=context_size)
context_layer = TransferMechanism(name=context_name,
input_shapes=context_size,
@@ -241,6 +244,8 @@ def construct_model(model_name:str=model_params['name'],
integrator_mode=True,
integration_rate=integration_rate)
+
+
em = EMComposition(name=em_name,
memory_template=[[0] * state_size, # state
[0] * state_size, # previous state
@@ -250,6 +255,15 @@ def construct_model(model_name:str=model_params['name'],
memory_decay_rate=0,
softmax_gain=retrieval_softmax_gain,
softmax_threshold=retrieval_softmax_threshold,
+ fields = {state_input_name: {FIELD_WEIGHT: state_retrieval_weight,
+ LEARN_FIELD_WEIGHT: False,
+ TARGET_FIELD: True},
+ previous_state_name: {FIELD_WEIGHT:previous_state_retrieval_weight,
+ LEARN_FIELD_WEIGHT: False,
+ TARGET_FIELD: False},
+ context_name: {FIELD_WEIGHT:context_retrieval_weight,
+ LEARN_FIELD_WEIGHT: False,
+ TARGET_FIELD: False}},
# Input Nodes:
# field_names=[state_input_name,
# previous_state_input_name,
@@ -259,19 +273,20 @@ def construct_model(model_name:str=model_params['name'],
# previous_state_retrieval_weight,
# context_retrieval_weight
# ),
- field_names=[previous_state_input_name,
- context_name,
- state_input_name,
- ],
- field_weights=(previous_state_retrieval_weight,
- context_retrieval_weight,
- state_retrieval_weight,
- ),
+ # field_names=[previous_state_input_name,
+ # context_name,
+ # state_input_name,
+ # ],
+ # field_weights=(previous_state_retrieval_weight,
+ # context_retrieval_weight,
+ # state_retrieval_weight,
+ # ),
normalize_field_weights=normalize_field_weights,
+ normalize_memories=normalize_memories,
concatenate_queries=concatenate_queries,
- learn_field_weights=learn_field_weights,
- learning_rate=learning_rate,
enable_learning=enable_learning,
+ learning_rate=learning_rate,
+ # target_fields=target_fields,
device=device
)
@@ -311,7 +326,7 @@ def construct_model(model_name:str=model_params['name'],
em]
previous_state_to_em_pathway = [previous_state_layer,
MappingProjection(sender=previous_state_layer,
- receiver=em.nodes[previous_state_input_name+QUERY],
+ receiver=em.nodes[previous_state_name+QUERY],
matrix=IDENTITY_MATRIX,
learnable=False),
em]
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Environment.py b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Environment.py
new file mode 100644
index 00000000000..124de532c83
--- /dev/null
+++ b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Environment.py
@@ -0,0 +1,54 @@
+import numpy as np
+import torch
+from torch.utils.data import dataset
+from random import randint
+
+def one_hot_encode(labels, num_classes):
+ """
+ One hot encode labels and convert to tensor.
+ """
+ return torch.tensor((np.arange(num_classes) == labels[..., None]).astype(float),dtype=torch.float32)
+
+class DeterministicCSWDataset(dataset.Dataset):
+ def __init__(self, n_samples_per_context, contexts_to_load) -> None:
+ super().__init__()
+ raw_xs = np.array([
+ [[9,1,3,5,7],[9,2,4,6,8]],
+ [[10,1,4,5,8],[10,2,3,6,7]]
+ ])
+
+ item_indices = np.random.choice(raw_xs.shape[1],sum(n_samples_per_context),replace=True)
+ task_names = [0,1] # Flexible so these can be renamed later
+ task_indices = [task_names.index(name) for name in contexts_to_load]
+
+ context_indices = np.repeat(np.array(task_indices),n_samples_per_context)
+ self.xs = one_hot_encode(raw_xs[context_indices,item_indices],11)
+
+ self.xs = self.xs.reshape((-1,11))
+ self.ys = torch.cat([self.xs[1:],one_hot_encode(np.array([0]),11)],dim=0)
+ context_indices = np.repeat(np.array(task_indices),[x*5 for x in n_samples_per_context])
+ self.contexts = one_hot_encode(context_indices, len(task_names))
+
+ # Remove the last transition since there's no next state available
+ self.xs = self.xs[:-1]
+ self.ys = self.ys[:-1]
+ self.contexts = self.contexts[:-1]
+
+ def __len__(self):
+ return len(self.xs)
+
+ def __getitem__(self, idx):
+ return self.xs[idx], self.contexts[idx], self.ys[idx]
+
+def generate_dataset(condition='Blocked'):
+ # Generate the dataset for either the blocked or interleaved condition
+ if condition=='Blocked':
+ contexts_to_load = [0,1,0,1] + [randint(0,1) for _ in range(40)]
+ n_samples_per_context = [40,40,40,40] + [1]*40
+ elif condition == 'Interleaved':
+ contexts_to_load = [0,1]*80 + [randint(0,1) for _ in range(40)]
+ n_samples_per_context = [1]*160 + [1]*40
+ else:
+ raise ValueError(f'Unknown dataset condition: {condition}')
+
+ return DeterministicCSWDataset(n_samples_per_context, contexts_to_load)
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO CSW Model (PyTorch).pdf b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO CSW Model (PyTorch).pdf
similarity index 100%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO CSW Model (PyTorch).pdf
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO CSW Model (PyTorch).pdf
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO CSW Model (basic).pdf b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO CSW Model (basic).pdf
similarity index 100%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO CSW Model (basic).pdf
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO CSW Model (basic).pdf
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO CSW Model (learning and store).pdf b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO CSW Model (learning and store).pdf
similarity index 100%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO CSW Model (learning and store).pdf
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO CSW Model (learning and store).pdf
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO CSW Model (learning).pdf b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO CSW Model (learning).pdf
similarity index 100%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO CSW Model (learning).pdf
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO CSW Model (learning).pdf
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO CSW Model - EM (with PNL learning).pdf b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO CSW Model - EM (with PNL learning).pdf
similarity index 100%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO CSW Model - EM (with PNL learning).pdf
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO CSW Model - EM (with PNL learning).pdf
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO CSW Model - EM.pdf b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO CSW Model - EM.pdf
similarity index 100%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO CSW Model - EM.pdf
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO CSW Model - EM.pdf
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO Paper Figure.jpg b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO Paper Figure.jpg
similarity index 100%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EGO Paper Figure.jpg
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EGO Paper Figure.jpg
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EMComposition (example BIG).pdf b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EMComposition (example BIG).pdf
similarity index 100%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/Figures/EMComposition (example BIG).pdf
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/Figures/EMComposition (example BIG).pdf
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/ScriptControl.py b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/ScriptControl.py
similarity index 93%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/ScriptControl.py
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/ScriptControl.py
index 43016886d3a..f61ec5f75d4 100644
--- a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/ScriptControl.py
+++ b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/ScriptControl.py
@@ -3,8 +3,8 @@
# Settings for running script:
-MODEL_PARAMS = 'TestParams'
-# MODEL_PARAMS = 'DeclanParams'
+# MODEL_PARAMS = 'TestParams'
+MODEL_PARAMS = 'DeclanParams'
CONSTRUCT_MODEL = True # THIS MUST BE SET TO True to run the script
DISPLAY_MODEL = ( # Only one of the following can be uncommented:
@@ -13,7 +13,7 @@
# # 'show_pytorch': True, # show pytorch graph of model
# 'show_learning': True,
# # 'show_nested_args': {'show_learning': pnl.ALL},
- # 'show_projections_not_in_composition': True,
+ # # 'show_projections_not_in_composition': True,
# # 'show_nested': {'show_node_structure': True},
# # 'exclude_from_gradient_calc_style': 'dashed'# show target mechanisms for learning
# # 'show_node_structure': True # show detailed view of node structures and projections
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/TestParams.py b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/TestParams.py
similarity index 86%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/TestParams.py
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/TestParams.py
index e9893eff726..2ba7073f178 100644
--- a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/TestParams.py
+++ b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/TestParams.py
@@ -1,14 +1,16 @@
from psyneulink.core.llvm import ExecutionMode
from psyneulink.core.globals.keywords import ALL, ADAPTIVE, CONTROL, CPU, Loss, MPS, OPTIMIZATION_STEP, RUN, TRIAL
+
+
model_params = dict(
# Names:
name = "EGO Model CSW",
+ em_name = "EM",
state_input_layer_name = "STATE",
previous_state_layer_name = "PREVIOUS STATE",
context_layer_name = 'CONTEXT',
- em_name = "EM",
prediction_layer_name = "PREDICTION",
# Structural
@@ -20,7 +22,6 @@
# memory_init = None, # Initialize with zeros
concatenate_queries = False,
# concatenate_queries = True,
-
# environment
# curriculum_type = 'Interleaved',
curriculum_type = 'Blocked',
@@ -33,18 +34,19 @@
context_weight = 1, # weight of the context used during memory retrieval
# normalize_field_weights = False, # whether to normalize the field weights during memory retrieval
normalize_field_weights = True, # whether to normalize the field weights during memory retrieval
+ normalize_memories = False, # whether to normalize the memory during memory retrieval
# softmax_temperature = None, # temperature of the softmax used during memory retrieval (smaller means more argmax-like
softmax_temperature = .1, # temperature of the softmax used during memory retrieval (smaller means more argmax-like
# softmax_temperature = ADAPTIVE, # temperature of the softmax used during memory retrieval (smaller means more argmax-like
# softmax_temperature = CONTROL, # temperature of the softmax used during memory retrieval (smaller means more argmax-like
# softmax_threshold = None, # threshold used to mask out small values in softmax
softmax_threshold = .001, # threshold used to mask out small values in softmax
- enable_learning=[True, False, False], # Enable learning for PREDICTION (STATE) but not CONTEXT or PREVIOUS STATE
- # enable_learning=[True, True, True]
- # enable_learning=True,
- # enable_learning=False,
- learn_field_weights = True,
- # learn_field_weights = False,
+ # target_fields=[True, False, False], # Enable learning for PREDICTION (STATE) but not CONTEXT or PREVIOUS STATE
+ # target_fields=[True, True, True]
+ # target_fields=True,
+ # target_fields=False,
+ enable_learning = True,
+ # enable_learning = False,
loss_spec = Loss.BINARY_CROSS_ENTROPY,
# loss_spec = Loss.CROSS_ENTROPY,
# loss_spec = Loss.MSE,
@@ -53,8 +55,8 @@
synch_weights = RUN,
synch_values = RUN,
synch_results = RUN,
- execution_mode = ExecutionMode.Python,
- # execution_mode = ExecutionMode.PyTorch,
+ # execution_mode = ExecutionMode.Python,
+ execution_mode = ExecutionMode.PyTorch,
device = CPU,
# device = MPS,
)
diff --git a/Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/__init__.py b/Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/__init__.py
similarity index 100%
rename from Scripts/Models (Under Development)/EGO/Using EMComposition/CSW/__init__.py
rename to Scripts/Models (Under Development)/EGO/Using EMComposition/Coffee Shop World/__init__.py
diff --git a/docs/source/_static/EMComposition_Example_fig.svg b/docs/source/_static/EMComposition_Example_fig.svg
index f3a5662f21e..7456c5d2b38 100644
--- a/docs/source/_static/EMComposition_Example_fig.svg
+++ b/docs/source/_static/EMComposition_Example_fig.svg
@@ -1,56 +1,94 @@
-
-
-
+
+
\ No newline at end of file
diff --git a/docs/source/_static/EMComposition_field_weights_different.pdf b/docs/source/_static/EMComposition_field_weights_different.pdf
new file mode 100644
index 00000000000..97ebdb43148
Binary files /dev/null and b/docs/source/_static/EMComposition_field_weights_different.pdf differ
diff --git a/docs/source/_static/EMComposition_field_weights_different.svg b/docs/source/_static/EMComposition_field_weights_different.svg
index 94aab6b6a7c..eeb15badcc4 100644
--- a/docs/source/_static/EMComposition_field_weights_different.svg
+++ b/docs/source/_static/EMComposition_field_weights_different.svg
@@ -1,103 +1,209 @@
-
-
-
+
+
\ No newline at end of file
diff --git a/docs/source/_static/EMComposition_field_weights_equal_fig.svg b/docs/source/_static/EMComposition_field_weights_equal_fig.svg
index dfa96297ffb..a093260a155 100644
--- a/docs/source/_static/EMComposition_field_weights_equal_fig.svg
+++ b/docs/source/_static/EMComposition_field_weights_equal_fig.svg
@@ -1,104 +1,209 @@
-
-
-
+
+
\ No newline at end of file
diff --git a/psyneulink/core/components/functions/nonstateful/transformfunctions.py b/psyneulink/core/components/functions/nonstateful/transformfunctions.py
index 86c1db6b7b5..99733aad2bc 100644
--- a/psyneulink/core/components/functions/nonstateful/transformfunctions.py
+++ b/psyneulink/core/components/functions/nonstateful/transformfunctions.py
@@ -2216,7 +2216,7 @@ def _function(self,
elif operation == L0:
if normalize:
- normalization = np.sum(np.abs(vector - matrix))
+ normalization = np.sum(np.abs(vector - matrix)) or 1
result = np.sum((1 - (np.abs(vector - matrix)) / normalization),axis=0)
else:
result = np.sum((np.abs(vector - matrix)),axis=0)
diff --git a/psyneulink/core/components/ports/inputport.py b/psyneulink/core/components/ports/inputport.py
index ce123b7118c..8fdcb4a2501 100644
--- a/psyneulink/core/components/ports/inputport.py
+++ b/psyneulink/core/components/ports/inputport.py
@@ -713,7 +713,8 @@ class InputPort(Port_Base):
is executed and its variable is assigned None. If *default_input* is assigned *DEFAULT_VARIABLE*, then the
`default value ` for the InputPort's `variable ` is used as its value.
This is useful for assignment to a Mechanism that needs a constant (i.e., fixed value) as the input to its
- `function `.
+ `function ` (such as a `bias unit ` in an
+ `AutodiffComposition`).
.. note::
If `default_input ` is assigned *DEFAULT_VARIABLE*, then its `internal_only
diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py
index 08f5e91e2bd..81b98a5aecd 100644
--- a/psyneulink/core/compositions/composition.py
+++ b/psyneulink/core/compositions/composition.py
@@ -3369,7 +3369,7 @@ class NodeRole(enum.Enum):
BIAS
A `Node ` for which one or more of its `InputPorts ` is assigned
*DEFAULT_VARIABLE* as its `default_input ` (which provides it a prespecified
- input that is constant across executions). Such a node can also be assigned as an `INPUT` and/or `ORIGIN`,
+ input that is constant across executions). Such a node can also be assigned as an `INPUT` and/or `ORIGIN`,
if it receives input from outside the Composition and/or does not receive any `Projections ` from
other Nodes within the Composition, respectively. This role cannot be modified programmatically.
diff --git a/psyneulink/library/components/mechanisms/modulatory/learning/EMstoragemechanism.py b/psyneulink/library/components/mechanisms/modulatory/learning/EMstoragemechanism.py
index f9d296eef87..fbd49f4d7a8 100644
--- a/psyneulink/library/components/mechanisms/modulatory/learning/EMstoragemechanism.py
+++ b/psyneulink/library/components/mechanisms/modulatory/learning/EMstoragemechanism.py
@@ -642,7 +642,7 @@ def _validate_params(self, request_set, target_set=None, context=None):
f"a list or 2d np.array containing entries that have the same shape "
f"({memory_matrix.shape}) as an entry (row) in 'memory_matrix' arg.")
- # Ensure the number of fields is equal to the numbder of items in variable
+ # Ensure the number of fields is equal to the number of items in variable
if FIELDS in request_set:
fields = request_set[FIELDS]
if len(fields) != len(self.variable):
diff --git a/psyneulink/library/compositions/autodiffcomposition.py b/psyneulink/library/compositions/autodiffcomposition.py
index 5ce5f1eb188..003b43db767 100644
--- a/psyneulink/library/compositions/autodiffcomposition.py
+++ b/psyneulink/library/compositions/autodiffcomposition.py
@@ -110,10 +110,17 @@
AutodiffComposition does not (currently) support the *automatic* construction of separate bias parameters.
Thus, when constructing a model using an AutodiffComposition that corresponds to one in PyTorch, the `bias
` parameter of PyTorch modules should be set
-to `False`. Trainable biases *can* be specified explicitly in an AutodiffComposition by including a
-TransferMechanism that projects to the relevant Mechanism (i.e., implementing that layer of the network to
-receive the biases) using a `MappingProjection` with a `matrix ` parameter that
-implements a diagnoal matrix with values corresponding to the initial value of the biases.
+to `False`.
+
+ .. hint::
+ Trainable biases *can* be specified explicitly in an AutodiffComposition by including a `ProcessingMechanism`
+ that projects to the relevant Mechanism (i.e., implementing that layer of the network to receive the biases)
+ using a `MappingProjection` with a `matrix ` parameter that implements a diagnoal
+ matrix with values corresponding to the initial value of the biases, and setting the `default_input
+ ` Parameter of one of the ProcessingMechanism's `input_ports
+ ` to *DEFAULT_VARIABLE*, and its `default_variable `
+ equal to 1. ProcessingMechanisms configured in this way are assigned `NodeRole` `BIAS`, and the MappingProjection
+ is subject to learning.
.. _AutodiffComposition_Nesting:
@@ -951,8 +958,9 @@ def create_pathway(node)->list:
return pathways
- # Construct a pathway for each INPUT Node (except the TARGET Node)
- pathways = [pathway for node in self.get_nodes_by_role(NodeRole.INPUT)
+ # Construct a pathway for each INPUT Node (including BIAS Nodes), except the TARGET Node)
+ pathways = [pathway
+ for node in (self.get_nodes_by_role(NodeRole.INPUT) + self.get_nodes_by_role(NodeRole.BIAS))
if node not in self.get_nodes_by_role(NodeRole.TARGET)
for pathway in _get_pytorch_backprop_pathway(node)]
@@ -1055,8 +1063,7 @@ def _get_loss(self, loss_spec):
# and therefore requires a wrapper function to properly package inputs.
return lambda x, y: nn.CrossEntropyLoss()(torch.atleast_2d(x), torch.atleast_2d(y.type(x.type())))
elif loss_spec == Loss.BINARY_CROSS_ENTROPY:
- if version.parse(torch.version.__version__) >= version.parse('1.12.0'):
- return nn.BCELoss()
+ return nn.BCELoss()
elif loss_spec == Loss.L1:
return nn.L1Loss(reduction='sum')
elif loss_spec == Loss.NLL:
@@ -1118,7 +1125,7 @@ def autodiff_forward(self, inputs, targets,
trial_loss = 0
for i in range(len(curr_tensors_for_trained_outputs[component])):
trial_loss += self.loss_function(curr_tensors_for_trained_outputs[component][i],
- curr_target_tensors_for_trained_outputs[component][i])
+ curr_target_tensors_for_trained_outputs[component][i])
pytorch_rep.minibatch_loss += trial_loss
pytorch_rep.minibatch_loss_count += 1
diff --git a/psyneulink/library/compositions/emcomposition.py b/psyneulink/library/compositions/emcomposition.py
index af84a5b7685..b8506217601 100644
--- a/psyneulink/library/compositions/emcomposition.py
+++ b/psyneulink/library/compositions/emcomposition.py
@@ -7,242 +7,8 @@
# ********************************************* EMComposition *************************************************
-#
-# TODO:
-# - QUESTION:
-# - SHOULD differential of SoftmaxGainControl Node be included in learning?
-# - SHOULD MEMORY DECAY OCCUR IF STORAGE DOES NOT? CURRENTLY IT DOES NOT (SEE EMStorage Function)
-
-# - FIX: Refactor field_weights to use None instead of 0 to specify value fields, and allow inputs to field_nodes
-# - FIX: ALLOW SOFTMAX SPEC TO BE A DICT WITH PARAMETERS FOR _get_softmax_gain() FUNCTION
-# - FIX: Concatenation:
-# - LLVM for function and derivative
-# - Add Concatenate to pytorchcreator_function
-# - Deal with matrix assignment in LearningProjection LINE 643
-# - Reinstate test for execution of Concatenate with learning in test_emcomposition (currently commented out)
-# - FIX: Softmax Gain Control:
-# Test if it current works (they are added to Composition but not in BackProp processing pathway)
-# Does backprop have to run through this if not learnable?
-# If so, need to add PNL Function, with derivative and LLVM and Pytorch implementations
-# - FIX: WRITE MORE TESTS FOR EXECUTION, WARNINGS, AND ERROR MESSAGES
-# - learning (with and without learning field weights
-# - 3d tuple with first entry != memory_capacity if specified
-# - list with number of entries > memory_capacity if specified
-# - input is added to the correct row of the matrix for each key and value for
-# for non-contiguous keys (e.g, field_weights = [1,0,1]))
-# - illegal field weight assignment
-# - explicitly that storage occurs after retrieval
-# - FIX: WARNING NOT OCCURRING FOR Normalize ON ZEROS WITH MULTIPLE ENTRIES (HAPPENS IF *ANY* KEY IS EVER ALL ZEROS)
-# - FIX: IMPLEMENT LearningMechanism FOR RETRIEVAL WEIGHTS:
-# - what is learning_update: AFTER doing? Use for scheduling execution of storage_node?
-# ?? implement derivative for concatenate
-# - FIX: implement add_storage_pathway to handle addition of storage_node as learning mechanism
-# - in "_create_storage_learning_components()" assign "learning_update" arg
-# as BEORE OR DURING instead of AFTER (assigned to learning_enabled arg of LearningMechanism)
-# - FIX: Add StorageMechanism LearningProjections to Composition? -> CAUSES TEST FAILURES; NEEDS INVESTIGATION
-# - FIX: Thresholded version of SoftMax gain (per Kamesh)
-# - FIX: DEAL WITH INDEXING IN NAMES FOR NON-CONTIGUOUS KEYS AND VALUES (reorder to keep all keys together?)
-# - FIX: _import_composition:
-# - MOVE LearningProjections
-# - MOVE Condition? (e.g., AllHaveRun) (OR PUT ON MECHANISM?)
-# - FIX: IMPLEMENT _integrate_into_composition METHOD THAT CALLS _import_composition ON ANOTHER COMPOSITION
-# - AND TRANSFERS RELEVANT ATTRIBUTES (SUCH AS MEMORY, query_input_nodeS, ETC., POSSIBLY APPENDING NAMES)
-# - FIX: ADD Option to suppress field_weights when computing norm for weakest entry in EMStorageMechanism
-# - FIX: GENERATE ANIMATION w/ STORAGE (uses Learning but not in usual way)
-# - IMPLEMENT use OF multiple inheritance of EMComposition from AutoDiff and Composition
-
-# - FIX: DOCUMENTATION:
-# - enable_learning vs. learning_field_weights
-# - USE OF EMStore.storage_location (NONE => LOCAL, SPECIFIED => GLOBAL)
-# - define "keys" and "values" explicitly
-# - define "key weights" explicitly as field_weights for all non-zero values
-# - make it clear that full size of memory is initialized (rather than "filling up" w/ use)
-# - write examples for run()
-# - FIX: ADD NOISE
-# - FIX: ?ADD add_memory() METHOD FOR STORING W/O RETRIEVAL, OR JUST ADD retrieval_prob AS modulable Parameter
-# - FIX: CONFIDENCE COMPUTATION (USING SIGMOID ON DOT PRODUCTS) AND REPORT THAT (EVEN ON FIRST CALL)
-# MISC:
-# - WRITE TESTS FOR INPUT_PORT and MATRIX SPECS CORRECT IN LATEST BRANCHs
-# - ACCESSIBILITY OF DISTANCES (SEE BELOW): MAKE IT A LOGGABLE PARAMETER (I.E., WITH APPROPRIATE SETTER)
-# ADD COMPILED VERSION OF NORMED LINEAR_COMBINATION FUNCTION TO LinearCombination FUNCTION: dot / (norm a * norm b)
-# - DECAY WEIGHTS BY:
-# ? 1-SOFTMAX / N (WHERE N = NUMBER OF ITEMS IN MEMORY)
-# or
-# 1/N (where N=number of items in memory, and thus gets smaller as N gets
-# larger) on each storage (to some asymptotic minimum value), and store the new memory to the unit with the
-# smallest weights (randomly selected among “ties" [i.e., within epsilon of each other]), I think we have a
-# mechanism that can adaptively use its limited capacity as sensibly as possible, by re-cycling the units
-# that have the least used memories.
-# - MAKE "_store_memory" METHOD USE LEARNING INSTEAD OF ASSIGNMENT
-# - make LearningMechanism that, instead of error, simply adds relevant input to weights (with all others = 0)
-# - (relationship to Steven's Hebbian / DPP model?):
-
-# - ADD ADDITIONAL PARAMETERS FROM CONTENTADDRESSABLEMEMORY FUNCTION
-# - ADAPTIVE TEMPERATURE: KAMESH FOR FORMULA
-# - ADD MEMORY_DECAY TO ContentAddressableMemory FUNCTION (and compiled version by Samyak)
-# - MAKE memory_template A CONSTRUCTOR ARGUMENT FOR default_variable
-
-# - FIX: PSYNEULINK:
-# - TESTS:
-# - WRITE TESTS FOR DriftOnASphere variable = scalar, 2d vector or 1d vector of correct and incorrect lengths
-# - WRITE TESTS FOR LEARNING WITH LinearCombination of 1, 2 and 3 inputs
-#
-# - COMPILATION:
-# - Remove CIM projections on import to another composition
-# - Autodiff support for IdentityFunction
-# - MatrixTransform to add normalization
-# - _store() method to assign weights to memory
-# - LLVM problem with ComparatorMechanism
-#
-# - pytorchcreator_function:
-# SoftMax implementation: torch.nn.Softmax(dim=0) is not getting passed correctly
-# Implement LinearCombination
-# - MatrixTransform Function:
-#
-# - LEARNING - Backpropagation LearningFunction / LearningMechanism
-# - DOCUMENTATION:
-# - weight_change_matrix = gradient (result of delta rule) * learning_rate
-# - ERROR_SIGNAL is OPTIONAL (only implemented when there is an error_source specified)
-# - Backprop: (related to above?) handle call to constructor with default_variable = None
-# - WRITE TESTS FOR USE OF COVARIATES AND RELATED VIOLATIONS: (see ScratchPad)
-# - Use of LinearCombination with PRODUCT in output_source
-# - Use of LinearCombination with PRODUCT in InputPort of output_source
-# - Construction of LearningMechanism with Backprop:
-# - MappingProjection / LearningMechanism:
-# - Add learning_rate parameter to MappingProjection (if learnable is True)
-# - Refactor LearningMechanism to use MappingProjection learning_rate specification if present
-# - CHECK FOR EXISTING LM ASSERT IN pytests
-#
-# - AutodiffComposition:
-# - replace handling / flattening of nested compositions with Pytorch.add_module (which adds "child" modules)
-# - Check that error occurs for adding a controller to an AutodiffComposition
-# - Check that if "epochs" is not in input_dict for Autodiff, then:
-# - set to num_trials as default,
-# - leave it to override num_trials if specified (add this to DOCUMENTATION)
-# - Input construction has to be:
-# - same for Autodiff in Python mode and PyTorch mode
-# (NOTE: used to be that autodiff could get left in Python mode
-# so only where tests for Autodiff happened did it branch)
-# - AND different from Composition (in Python mode)
-# - support use of pathway argument in Autodff
-# - the following format doesn't work for LLVM (see test_identicalness_of_input_types:
-# xor = pnl.AutodiffComposition(nodes=[input_layer,hidden_layer,output_layer])
-# xor.add_projections([input_to_hidden_wts, hidden_to_output_wts])
-# - DOCUMENTATION: execution_mode=ExecutionMode.Python allowed
-# - Add warning of this on initial call to learn()
-#
-# - Composition:
-# - Add default_execution_mode attribute to allow nested Compositions to be executed in
-# different model than outer Composition
-# - _validate_input_shapes_and_expand_for_all_trials: consolidate with get_input_format()
-# - Generalize treatment of FEEDBACK specification:
- # - FIX: ADD TESTS FOR FEEDBACK TUPLE SPECIFICATION OF Projection, DIRECT SPECIFICATION IN CONSTRUCTOR
-# - FIX: why aren't FEEDBACK_SENDER and FEEDBACK_RECEIVER roles being assigned when feedback is specified?
-# - add property that keeps track of warnings that have been issued, and suppresses repeats if specified
-# - add property of Composition that lists it cycles
-# - Add warning if termination_condition is trigged (and verbosePref is set)
-# - Addition of projections to a ControlMechanism seems too dependent on the order in which the
-# the ControlMechanism is constructed with respect to its afferents (if it comes before one,
-# the projection to it (i.e., for monitoring) does not get added to the Composition
-# - - IMPLEMENTATION OF LEARNING: NEED ERROR IF TRY TO CALL LEARN ON A COMPOSITION THAT HAS NO LEARNING MECHANISMS
-# INCLUDING IN PYTHON MODE?? OR JUST ALLOW IT TO CONSTRUCT THE PATHWAY AUTOMATICALLY?
-# - Change size argument in constructor to use standard numpy shape format if tupe, and PNL format if list
-# - Write convenience Function for returning current time from context
-# - requires it be called from execution within aComposition, error otherwise)
-# - takes argument for time scale (e.g., TimeScale.TRIAL, TimeScale.RUN, etc.)
-# - Add TimeMechanism for which this is the function, and can be configured to report at a timescale
-# - Add Composition.run_status attribute assigned a context flag, with is_preparing property that checks it
-# (paralleling handling of is_initializing)
-# - Allow set of lists as specification for pathways in Composition
-# - Add support for set notation in add_backpropagation_learning_pathway (to match add_linear_processing_pathway)
-# see ScratchPad: COMPOSITION 2 INPUTS UNNESTED VERSION: MANY-TO-MANY
-# - Make sure that shadow inputs (see InputPort_Shadow_Inputs) uses the same matrix as shadowed input.
-# - composition.add_backpropagation_learning_pathway(): support use of set notation for multiple nodes that
-# project to a single one.
-# - add LearningProjections executed in EXECUTION_PHASE to self.projections
-# and then remove MODIFIED 8/1/23 in _check_for_unused_projections
-# - Why can't verbosePref be set directly on a composition?
-# - Composition.add_nodes():
-# - should check, on each call to add_node, to see if one that has a releavantprojection and, if so, add it.
-# - Allow [None] as argument and treat as []
-# - IF InputPort HAS default_input = DEFAULT_VARIABLE,
-# THEN IT SHOULD BE IGNORED AS AN INPUT NODE IN A COMPOSITION
-# - Add use of dict in pathways specification to map outputs from a set to inputs of another set
-# (including nested comps)
-#
-# - ShowGraph: (show_graph)
-# - don't show INPUT/OUTPUT Nodes for nested Comps in green/red
-# (as they don't really receive input or generate output on a run
-# - show feedback projections as pink (shouldn't that already be the case?)
-# - add mode for showing projections as diamonds without show_learning (e.g., "show_projections")
-# - figure out how to get storage_node to show without all other learning stuff
-# - show 'operation' parameter for LinearCombination in show_node_structure=ALL
-# - specify set of nodes to show and only show those
-# - fix: show_learning=ALL (or merge from EM branch)
-#
-# - ControlMechanism
-# - refactor ControlMechanism per notes of 11/3/21, including:
-# FIX: 11/3/21 - MOVE _parse_monitor_specs TO HERE FROM ObjectiveMechanism
-# - EpisodicMemoryMechanism:
-# - make storage_prob and retrieval_prob parameters linked to function
-# - make distance_field_weights a parameter linked to function
-#
-# - LinearCombination Function:
-# - finish adding derivative (for if exponents are specified)
-# - remove properties (use getter and setter for Parameters)
-#
-# - ContentAddressableMemory Function:
-# - rename "cue" -> "query"
-# - add field_weights as parameter of EM, and make it a shared_parameter ?as well as a function_parameter?
-
-# - DDM:
-# - make reset_stateful_function_when a Parameter and arg in constructor
-# and align with reset Parameter of IntegratorMechanism)
-#
-# - FIX: BUGS:
-# - composition:
-# - If any MappingProjection is specified from nested node to outer node,
-# then direct projections are instantiated to the output_CIM of the outer comp, and the
-# nested comp is treated as OUTPUT Node of outer comp even if all its projections are to nodes in outer comp
-# LOOK IN add_projections? for nested comps
-# - composition (?add_backpropagation_learning_pathway?):
-# THIS FAILS:
-# comp = Composition(name='a_outer')
-# comp.add_backpropagation_learning_pathway([input_1, hidden_1, output_1])
-# comp.add_backpropagation_learning_pathway([input_1, hidden_1, output_2])
-# BUT THE FOLLOWING WORKS (WITH IDENTICAL show_graph(show_learning=True)):
-# comp = Composition(name='a_outer')
-# comp.add_backpropagation_learning_pathway([input_1, hidden_1, output_1])
-# comp.add_backpropagation_learning_pathway([hidden_1, output_2])
-# - show_graph(): QUIRK (BUT NOT BUG?):
-# SHOWS TWO PROJECTIONS FROM a_inner.input_CIM -> hidden_x:
-# ?? BECAUSE hidden_x HAS TWO input_ports SINCE ITS FUNCTION IS LinearCombination?
-# a_inner = AutodiffComposition([hidden_x],name='a_inner')
-# a_outer = AutodiffComposition([[input_1, a_inner, output_1],
-# [a_inner, output_2]],
-# a_outer.show_graph(show_cim=True)
-
-# -LearningMechanism / Backpropagation LearningFunction:
-# - Construction of LearningMechanism on its own fails; e.g.:
-# lm = LearningMechanism(learning_rate=.01, learning_function=BackPropagation())
-# causes the following error:
-# TypeError("Logistic.derivative() missing 1 required positional argument: 'self'")
-# - Adding GatingMechanism after Mechanisms they gate fails to implement gating projections
-# (example: reverse order of the following in _construct_pathways
-# self.add_nodes(self.softmax_nodes)
-# self.add_nodes(self.field_weight_nodes)
-# - add Normalize as option
-# - Anytime a row's norm is 0, replace with 1s
-# - WHY IS Concatenate NOT WORKING AS FUNCTION OF AN INPUTPORT (WASN'T THAT USED IN CONTEXT OF BUFFER?
-# SEE NOTES TO KATHERINE
-#
-# - TESTS
-# For duplicate Projections (e.g., assign a Mechanism in **monitor** of ControlMechanism
-# and use comp.add_projection(MappingProjection(mointored, control_mech) -> should generate a duplicate
-# then search for other instances of the same error message
"""
-
Contents
--------
@@ -250,17 +16,18 @@
- `Organization `
- `Operation `
* `EMComposition_Creation`
- - `Fields `
+ - `Memory `
- `Capacity `
+ - `Fields `
- `Storage and Retrieval `
- `Learning `
* `EMComposition_Structure`
- `Input `
- - `Memory `
+ - `Memory `
- `Output `
* `EMComposition_Execution`
- `Processing `
- - `Learning `
+ - `Learning `
* `EMComposition_Examples`
- `Memory Template and Fill `
- `Field Weights `
@@ -271,27 +38,36 @@
Overview
--------
-The EMComposition implements a configurable, content-addressable form of episodic, or eternal memory, that emulates
+The EMComposition implements a configurable, content-addressable form of episodic (or external) memory. It emulates
an `EpisodicMemoryMechanism` -- reproducing all of the functionality of its `ContentAddressableMemory` `Function` --
-in the form of an `AutodiffComposition` that is capable of learning how to differentially weight different cues used
-for retrieval,, and that adds the capability for `memory_decay `. Its `memory
-` is configured using two arguments of its constructor: **memory_template** argument, that defines
-how each entry in `memory ` is structured (the number of fields in each entry and the length
-of each field); and **field_weights** argument, that defines which fields are used as cues for retrieval, i.e., "keys",
-including whether and how they are differentially weighted in the match process used for retrieval); and which
-fields are treated as "values" that are stored retrieved, but not used by the match process. The inputs to an
-EMComposition, corresponding to each key ("query") and value field are assigned to each of its `INPUT `
-`Nodes ` (listed in its `query_input_nodes ` and `value_input_nodes
-` attributes, respectively), and the retrieved values are represented as `OUTPUT
-` `Nodes ` of the EMComposition. The `memory ` can be
-accessed using its `memory ` attribute.
+in the form of an `AutodiffComposition`. This allows it to backpropagate error signals based retrieved values to
+it inputs, and learn how to differentially weight cues (queries) used for retrieval. It also adds the capability for
+`memory_decay `. In these respects, it implements a variant of a `Modern Hopfield
+Network `_, as well as some of the features of a `Transformer
+`_
+
+The `memory ` of an EMComposition is configured using two arguments of its constructor:
+the **memory_template** argument, that defines the overall structure of its `memory ` (the
+number of fields in each entry, the length of each field, and the number of entries); and **fields** argument, that
+defines which fields are used as cues for retrieval (i.e., as "keys"), including whether and how they are weighted in
+the match process used for retrieval, which fields are treated as "values" that are stored retrieved but not used by
+the match process, and which are involved in learning. The inputs to an EMComposition, corresponding to its keys and
+values, are assigned to each of its `INPUT ` `Nodes `: inputs to be matched to keys
+(i.e., used as "queries") are assigned to its `query_input_nodes `; and the remaining
+inputs assigned to it `value_input_nodes `. When the EMComposition is executed, the
+retrieved values for all fields are returned as the result, and recorded in its `results `
+attribute. The value for each field is assigned as the `value ` of its `OUTPUT `
+`Nodes `. The input is then stored in its `memory `, with a probability
+determined by its `storage_prob ` `Parameter`, and all previous memories decayed by its
+`memory_decay_rate `. The `memory ` can be accessed using its
+`memory ` Parameter.
.. technical_note::
- The memories of an EMComposition are actually stored in the `matrix ` attribute of a
- set of `MappingProjections ` (see `note below `). The `memory
- ` attribute compiles and formats these as a single 3d array, the rows of which (axis 0)
- are each entry, the columns of which (axis 1) are the fields of each entry, and the items of which (axis 2)
- are the values of each field (see `EMComposition_Memory` for additional details).
+ The memories of an EMComposition are actually stored in the `matrix ` `Parameter`
+ of a set of `MappingProjections ` (see `note below `). The
+ `memory ` Parameter compiles and formats these as a single 3d array, the rows of which
+ (axis 0) are each entry, the columns of which (axis 1) are the fields of each entry, and the items of which
+ (axis 2) are the values of each field (see `EMComposition_Memory_Configuration` for additional details).
.. _EMComposition_Organization:
@@ -302,14 +78,14 @@
*Entries and Fields*. Each entry in memory can have an arbitrary number of fields, and each field can have an arbitrary
length. However, all entries must have the same number of fields, and the corresponding fields must all have the same
length across entries. Each field is treated as a separate "channel" for storage and retrieval, and is associated with
-its own corresponding input (key or value) and output (retrieved value) `Node ` some or all of
+its own corresponding input (key or value) and output (retrieved value) `Node `, some or all of
which can be used to compute the similarity of the input (key) to entries in memory, that is used for retreieval.
Fields can be differentially weighted to determine the influence they have on retrieval, using the `field_weights
-` parameter (see `retrieval ` below). The number and
-shape of the fields in each entry is specified in the **memory_template** argument of the EMComposition's constructor
-(see `memory_template `). Which fields treated as keys (i.e., matched against queries during
-retrieval) and which are treated as values (i.e., retrieved but not used for matching retrieval) is specified in the
-**field_weights** argument of the EMComposition's constructor (see `field_weights `).
+` parameter (see `retrieval ` below). The number and shape
+of the fields in each entry is specified in the **memory_template** argument of the EMComposition's constructor (see
+`memory_template `). Which fields treated as keys (i.e., matched against queries
+during retrieval) and which are treated as values (i.e., retrieved but not used for matching retrieval) is specified in
+the **field_weights** argument of the EMComposition's constructor (see `field_weights `).
.. _EMComposition_Operation:
@@ -317,39 +93,46 @@
*Retrieval.* The values retrieved from `memory ` (one for each field) are based
on the relative similarity of the keys to the entries in memory, computed as the distance of each key and the
-values in the corresponding field for each entry in memory. By default, normalized dot products (comparable to cosine
-similarity) are used to compute the similarity of each query to each key in memory. These distances are then
-weighted by the corresponding `field_weights ` for each field (if specified) and then
-summed, and the sum is softmaxed to produce a softmax distribution over the entries in memory. That is then used to
-generate a softmax-weighted average of the retrieved values across all fields, which is returned as the `result
-` of the EMComposition's `execution ` (an EMComposition can also be
-configured to return the entry with the lowest distance weighted by field, however then it is not compatible
-with learning; see `softmax_choice `).
+values in the corresponding field for each entry in memory. By default, for queries and keys that are vectors,
+normalized dot products (comparable to cosine similarity) are used to compute the similarity of each query to each
+key in memory; and if they are scalars the L0 norm is used. These distances are then weighted by the corresponding
+`field_weights ` for each field (if specified) and then summed, and the sum is softmaxed
+to produce a softmax distribution over the entries in memory. That is then used to generate a softmax-weighted average
+of the retrieved values across all fields, which is returned as the `result ` of the EMComposition's
+`execution ` (an EMComposition can also be configured to return the exact entry with the lowest
+distance (weighted by field), however then it is not compatible with learning; see `softmax_choice
+`).
COMMENT:
TBD DISTANCE ATTRIBUTES:
- The distances used for the last retrieval is stored in XXXX and the distances of each of their corresponding fields
+ The distance used for the last retrieval is stored in XXXX, and the distances of each of their corresponding fields
(weighted by `distance_field_weights `), are returned in XXX,
respectively.
COMMENT
-*Storage.* The `inputs ` to the EMComposition's fields are stored in `memory
-` after each execution, with a probability determined by `storage_prob
-`. If `memory_decay_rate ` is specified, then the `memory
-` is decayed by that amount after each execution. If `memory_capacity
-` has been reached, then each new memory replaces the weakest entry (i.e., the one
-with the smallest norm across all of its fields) in `memory `.
+*Storage.* The `inputs ` to the EMComposition's fields are stored
+in `memory ` after each execution, with a probability determined by `storage_prob
+`. If `memory_decay_rate ` is specified, then
+the `memory ` is decayed by that amount after each execution. If `memory_capacity
+` has been reached, then each new memory replaces the weakest entry
+(i.e., the one with the smallest norm across all of its fields) in `memory `.
.. _EMComposition_Creation:
Creation
--------
-An EMComposition is created by calling its constructor, that takes the following arguments:
+An EMComposition is created by calling its constructor. There are four major elements that can be configured:
+the structure of its `memory ; the fields ` for the entries
+in memory; how `storage and retrieval ` operate; and whether and how `learning
+` is carried out.
+
+.. _EMComposition_Memory_Specification:
- .. _EMComposition_Fields:
+*Memory Specification*
+~~~~~~~~~~~~~~~~~~~~~~
-*Field Specification*
+These arguments are used to specify the shape and number of memory entries.
.. _EMComposition_Memory_Template:
@@ -394,18 +177,6 @@
zeros, and **memory_fill** is specified, then the matrix is filled with the value specified in **memory_fill**;
otherwise, zeros are used to fill all entries.
-.. _EMComposition_Memory_Capacity:
-
-*Memory Capacity*
-
-* **memory_capacity**: specifies the number of items that can be stored in the EMComposition's memory; when
- `memory_capacity ` is reached, each new entry overwrites the weakest entry (i.e., the
- one with the smallest norm across all of its fields) in `memory `. If `memory_template
- ` is specified as a 3-item tuple or 3d list or array (see above), then that is used
- to determine `memory_capacity ` (if it is specified and conflicts with either of those
- an error is generated). Otherwise, it can be specified using a numerical value, with a default of 1000. The
- `memory_capacity ` cannot be modified once the EMComposition has been constructed.
-
.. _EMComposition_Memory_Fill:
* **memory_fill**: specifies the value used to fill the `memory `, based on the shape specified
@@ -420,66 +191,130 @@
This can be ignored, as it does not affect the results of execution, but it can be averted by specifying
`memory_fill ` to use small random values (e.g., ``memory_fill=(0,.001)``).
+.. _EMComposition_Memory_Capacity:
+
+* **memory_capacity**: specifies the number of items that can be stored in the EMComposition's memory; when
+ `memory_capacity ` is reached, each new entry overwrites the weakest entry (i.e., the
+ one with the smallest norm across all of its fields) in `memory `. If `memory_template
+ ` is specified as a 3-item tuple or 3d list or array (see above), then that is used
+ to determine `memory_capacity ` (if it is specified and conflicts with either of those
+ an error is generated). Otherwise, it can be specified using a numerical value, with a default of 1000. The
+ `memory_capacity ` cannot be modified once the EMComposition has been constructed.
+
+.. _EMComposition_Fields:
+
+*Fields*
+~~~~~~~~
+
+These arguments are used to specify the names of the fields in a memory entry, which are used as keys and how those are
+weighted for retrieval, and whether those weights are learned.
+
+.. _EMComposition_Field_Specification_Dict:
+
+* **fields**: a dict that specifies the names of the fields and their attributes. There must be an entry for each
+ field specified in the **memory_template**, and must have the following format:
+
+ * *key*: a string that specifies the name of the field.
+
+ * *value*: a dict or tuple with three entries; if a dict, the key to each entry must be the keyword specified below,
+ and if a tuple, the entries must appear in the following order:
+
+ - *FIELD_WEIGHT* `specification ` - value must be a scalar or None. If it is a scalar,
+ the field is treated as a `retrieval key ` in `memory ` that
+ is weighted by that value during retrieval; if None, it is treated as a value in `memory `
+ and the field cannot be reconfigured later.
+
+ - *LEARN_FIELD_WEIGHT* `specification ` - value must be a boolean or a float;
+ if False, the field_weight for that field is not learned; if True, the field weight is learned using the
+ EMComposition's `learning_rate `; if a float, that is used as its learning_rate.
+
+ - *TARGET_FIELD* `specification ` - value must be a boolean; if True, the value of the
+ `retrieved_node ` for that field conrtributes to the error computed during learning
+ and backpropagated through the EMComposition (see `Backpropagation of `);
+ if False, the retrieved value for that field does not contribute to the error; however, its field_weight can still
+ be learned if that is specfified in `learn_field_weight `.
+
+ .. _note:
+ The **fields** argument is provided as a convenient and reliable way of specifying field attributes;
+ the dict itself is not retained as a `Parameter` or attribute of the EMComposition.
+
+ The specifications provided in the **fields** argument are assigned to the corresponding Parameters of
+ the EMComposition which, alternatively, can be specified individually using the **field_names**, **field_weights**,
+ **learn_field_weights** and **target_fields** arguments of the EMComposition's constructor, as described below.
+ However, these and the **fields** argument cannot both be used together; doing so raises an error.
+
+.. _EMComposition_Field_Names:
+
+* **field_names**: a list specifies names that can be assigned to the fields. The number of names specified must match
+ the number of fields specified in the memory_template. If specified, the names are used to label the nodes of the
+ EMComposition; otherwise, the fields are labeled generically as "Key 0", "Key 1", and "Value 1", "Value 2", etc..
+
.. _EMComposition_Field_Weights:
-* **field_weights**: specifies which fields are used as keys, and how they are weighted during retrieval. The
- number of entries specified must match the number of fields specified in **memory_template** (i.e., the size of
- of its first dimension (axis 0)). All non-zero entries must be positive; these designate *keys* -- fields
- that are used to match queries against entries in memory for retrieval (see `Match memories by field
- `). Entries of 0 designate *values* -- fields that are ignored during the matching
- process, but the values of which are retrieved and assigned as the `value ` of the
- corresponding `retrieved_node `. This distinction between keys and value corresponds
+* **field_weights**: specifies which fields are used as keys, and how they are weighted during retrieval. Fields
+ designated as keys used to match inputs (queries) against entries in memory for retrieval (see `Match memories by
+ field `); entries designated as *values* are ignored during the matching process, but
+ their values in memory are retrieved and assigned as the `value ` of the corresponding
+ `retrieved_node `. This distinction between keys and value corresponds
to the format of a standard "dictionary," though in that case only a single key and value are allowed, whereas
- here there can be one or more keys and any number of values; if all fields are keys, this implements a full form of
- content-addressable memory. If **learn_field_weights** is True (and `enable_learning`
- is either True or a list with True for at least one entry), then the field_weights can be modified during training
- (this functions similarly to the attention head of a Transformer model, although at present the field can only be
- scalar values rather than vecdtors); if **learn_field_weights** is False, then the field_weights are fixed.
- The following options can be used to specify **field_weights**:
-
- * *None* (the default): all fields except the last are treated as keys, and are weighted equally for retrieval,
- while the last field is treated as a value field;
-
- * *single entry*: all fields are treated as keys (i.e., used for retrieval) and weighted equally for retrieval.
- if `normalize_field_weights ` is True, the value is ignored and all
- of keys are weighted by 1 / number of keys (i.e., normalized), whereas if `normalize_field_weights
- ` is False, then the value specified is used to weight the retrieval of
- every keys.
-
- * *multiple non-zero entries*: If all entries are identical, the value is ignored and the corresponding keys
- are weighted equally for retrieval; if the non-zero entries are non-identical, they are used to weight the
- corresponding fields during retrieval (see `Weight fields `). In either case,
- the remaining fields (with zero weights) are treated as value fields.
-
- _EMComposition_Field_Weights_Note:
+ in an EMComposition there can be one or more keys and any number of values; if all fields are keys, this implements a
+ full form of content-addressable memory. The following options can be used to specify **field_weights**:
+
+ * *None* (the default): all fields except the last are treated as keys, and are assigned a weight of 1,
+ while the last field is treated as a value field (same as assiging it None in a list or tuple (see below).
+
+ * *scalar*: all fields are treated as keys (i.e., used for retrieval) and weighted equally for retrieval. If
+ `normalize_field_weights ` is True, the value is divided by the number
+ of keys, whereas if `normalize_field_weights ` is False, then the value
+ specified is used to weight the retrieval of all keys with that value.
+
+ .. note::
+ At present these have the same result, since the `SoftMax` function is used to normalize the match between
+ queries and keys. However, other retrieval functions could be used in the future that would be affected by
+ the value of the `field_weights `. Therefore, it is recommended to leave
+ `normalize_field_weights ` set to True (the default) to ensure that
+ the `field_weights ` are normalized to sum to 1.0.
+
+ * *list or tuple*: the number of entries must match the number of fields specified in **memory_template**, and
+ all entries must be either 0, a positive scalar value, or None. If all entries are identical, they are treated
+ as if a single value was specified (see above); if the entries are non-identical, any entries that are not None
+ are used to weight the corresponding fields during retrieval (see `Weight fields `),
+ including those that are 0 (though these will not be used in the retrieval process unless/until they are changed
+ to a positive value). If `normalize_field_weights ` is True, all non-None
+ entries are normalized so that they sum to 1.0; if False, the raw values are used to weight the retrieval of
+ the corresponding fields. All entries of None are treated as value fields, are not assigned a `field_weight_node
+ `, and are ignored during retrieval. These *cannot be modified* after the
+ EMComposition has been constructed (see note below).
+
+ .. _EMComposition_Field_Weights_Change_Note:
+
.. note::
The field_weights can be modified after the EMComposition has been constructed, by assigning a new set of weights
to its `field_weights ` `Parameter`. However, only field_weights associated with
- key fields (i.e., were initially assigned non-zero field_weights) can be modified; the weights for value fields
- (i.e., ones that were initially assigned a field_weight of 0) cannot be modified, and an attempt to do so will
- generate an error. If a field initially used as a value may later need to be used as a key, it should be
- assigned a non-zero field_weight when the EMComposition is constructed; it can then be assigned 0 just after
- construction, and later changed as needed.
+ key fields (i.e., that were initially assigned non-zero field_weights) can be modified; the weights for value
+ fields (i.e., ones that were initially assigned a field_weight of None) cannot be modified, and doing so raises
+ an error. If a field that will be used initially as a value may later need to be used as a key, it should be
+ assigned a `field_weight ` of 0 at construction (rather than None), which can then
+ later be changed as needed.
.. technical_note::
- The reason that only field_weights for keys can be modified is that only `field_weight_nodes
- ` for keys are constructed, since ones for values would have no effect on the
- retrieval process and thus are uncecessary.
+ The reason that field_weights can be modified only for keys is that `field_weight_nodes
+ ` are constructed only for keys, since ones for values would have no effect
+ on the retrieval process and therefore are uncecessary (and can be misleading).
-.. _EMComposition_Normalize_Field_Weights:
-* **normalize_field_weights**: specifies whether the `field_weights ` are normalized
- or their raw values are used. If True, the `field_weights ` are normalized so that
- they sum to 1.0, and are used to weight (i.e., multiply) the corresponding fields during retrieval (see `Weight
- fields `). If False, the raw values of the `field_weights `
- are used to weight the retrieved value of each field. This setting is ignored if **field_weights**
- is None or `concatenate_queries ` is in effect.
+* **learn_field_weights**: if **enable_learning** is True, this specifies which field_weights are subject to learning,
+ and optionally the `learning_rate ` for each (see `learn_field_weights
+ ` below for details of specification).
-.. _EMComposition_Field_Names:
+.. _EMComposition_Normalize_Field_Weights:
-* **field_names**: specifies names that can be assigned to the fields. The number of names specified must
- match the number of fields specified in the memory_template. If specified, the names are used to label the
- nodes of the EMComposition. If not specified, the fields are labeled generically as "Key 0", "Key 1", etc..
+* **normalize_field_weights**: specifies whether the `field_weights ` are normalized or
+ their raw values are used. If True, the value of all non-None `field_weights ` are
+ normalized so that they sum to 1.0, and the normalized values are used to weight (i.e., multiply) the corresponding
+ fields during retrieval (see `Weight fields `). If False, the raw values of the
+ `field_weights ` are used to weight the retrieved value of each field. This setting
+ is ignored if **field_weights** is None or `concatenate_queries ` is True.
.. _EMComposition_Concatenate_Queries:
@@ -503,27 +338,20 @@
are always preserved, even when `concatenate_queries ` is True, so that
separate inputs can be provided for each key, and the value of each key can be retrieved separately.
-.. _EMComposition_Memory_Decay_Rate
-
-* **memory_decay_rate**: specifies the rate at which items in the EMComposition's memory decay; the default rate
- is *AUTO*, which sets it to 1 / `memory_capacity `, such that the oldest memories
- are the most likely to be replaced when `memory_capacity ` is reached. If
- **memory_decay_rate** is set to 0 None or False, then memories do not decay and, when `memory_capacity
- ` is reached, the weakest memories are replaced, irrespective of order of entry.
-
.. _EMComposition_Retrieval_Storage:
*Retrieval and Storage*
+~~~~~~~~~~~~~~~~~~~~~~~
-* **storage_prob** : specifies the probability that the inputs to the EMComposition will be stored as an item in
+* **storage_prob**: specifies the probability that the inputs to the EMComposition will be stored as an item in
`memory ` on each execution.
-* **normalize_memories** : specifies whether queries and keys in memory are normalized before computing their dot
+* **normalize_memories**: specifies whether queries and keys in memory are normalized before computing their dot
products.
.. _EMComposition_Softmax_Gain:
-* **softmax_gain** : specifies the gain (inverse temperature) used for softmax normalizing the combined distances
+* **softmax_gain**: specifies the gain (inverse temperature) used for softmax normalizing the combined distances
used for retrieval (see `EMComposition_Execution` below). The following options can be used:
* numeric value: the value is used as the gain of the `SoftMax` Function for the EMComposition's
@@ -548,7 +376,7 @@
.. _EMComposition_Softmax_Choice:
-* **softmax_choice** : specifies how the `SoftMax` Function of the EMComposition's `softmax_node
+* **softmax_choice**: specifies how the `SoftMax` Function of the EMComposition's `softmax_node
` is used, with the combined distances, to generate a retrieved item;
the following are the options that can be used and the retrieved value they produce:
@@ -562,7 +390,7 @@
.. warning::
Use of the *ARG_MAX* and *PROBABILISTIC* options is not compatible with learning, as these implement a discrete
choice and thus are not differentiable. Constructing an EMComposition with **softmax_choice** set to either of
- these options and **enable_learning** set to True (or a list with any True entries) will generate a warning, and
+ these options and **learn_field_weights** set to True (or a list with any True entries) will generate a warning, and
calling the EMComposition's `learn ` method will generate an error; it must be changed to
*WEIGHTED_AVG* to execute learning.
@@ -571,37 +399,91 @@
passed as *ARG_MAX_INDICATOR*; and *PROBALISTIC* is passed as *PROB_INDICATOR*; the other SoftMax options are
not currently supported.
+.. _EMComposition_Memory_Decay_Rate:
+
+* **memory_decay_rate**: specifies the rate at which items in the EMComposition's memory decay; the default rate
+ is *AUTO*, which sets it to 1 / `memory_capacity `, such that the oldest memories
+ are the most likely to be replaced when `memory_capacity ` is reached. If
+ **memory_decay_rate** is set to 0 None or False, then memories do not decay and, when `memory_capacity
+ ` is reached, the weakest memories are replaced, irrespective of order of entry.
+
+.. _EMComposition_Purge_by_Weight:
+
+* **purge_by_field_weight**: specifies whether `field_weights ` are used in determining
+ which memory entry is replaced when a new memory is `stored `. If True, the norm of each
+ entry is multiplied by its `field_weight ` to determine which entry is the weakest and
+ will be replaced.
+
.. _EMComposition_Learning:
*Learning*
+~~~~~~~~~~
-EMComposition supports two forms of learning -- error backpropagation and the learning of `field_weights
-` -- that can be configured by the following arguments of the EMComposition's constructor:
-
-* **enable_learning** : specifies whether learning is enabled for the EMComposition and, if so, which `retrieved_nodes
- ` are used to compute errors, and propagate these back through the network. If
- **enable_learning** is False, then no learning occurs, including of `field_weights `).
- If it is True, then all of the `retrieved_nodes ` participate in learning: For
- those that do not project to an outer Composition (i.e., one in which the EMComposition is `nested
- `), a `TARGET ` node is constructed for each, and used to compute errors that
- are backpropagated through the network to its `query_input_nodes ` and
- `value_input_nodes `, and on to any nodes that project to it from a composition
- in which the EMComposition is `nested `; retrieved_nodes that *do* project to an outer
- Composition receive their errors from those nodes, which are also backpropagated through the EMComposition.
- If **enable_learning** is a list, then only the `retrieved_nodes ` specified in the
- list participate in learning, and errors are computed only for those nodes. The list must contain the same
- number of entries as there are `fields ` and corresponding `retreived_nodes
- `, and each entry must be a boolean that specifies whether the corresponding
- `retrieved_node ` is used for learning.
-
-* **learn_field_weights** : specifies whether `field_weights ` are modifiable during
- learning (see `field_weights ` and `Learning ` for additional
- information. For learning of `field_weights ` to occur, **enable_learning** must
- also be True, or it must be a list with at least one True entry. If **learn_field_weights** is True,
- **use_gating_for_weighting** must be False (see `note `).
-
-* **learning_rate** : specifies the rate at which `field_weights ` are learned if
- **learn_field_weights** is True; see `Learning ` for additional information.
+EMComposition supports two forms of learning: error backpropagation through the entire Composition, and the learning
+of `field_weights ` within it. Learning is enabled by setting the **enable_learning**
+argument of the EMComposition's constructor to True, and optionally specifying the **learn_field_weights** argument
+(as detailed below). If **enable_learning** is False, no learning of any kind occurs; if it is True, then both forms
+of learning are enable.
+
+.. _EMComposition_Error_BackPropagation
+
+*Backpropagation of error*. If **enable_learning** is True, then the values retrieved from `memory
+` when the EMComposition is executed during learning can be used for error computation
+and backpropagation through the EMComposition to its inputs. By default, the values of all of its `retrieved_nodes
+` are included. For those that do not project to an outer Composition (i.e., one in
+which the EMComposition is `nested `), a `TARGET ` node is constructed
+for each, and used to compute errors that are backpropagated through the network to its `query_input_nodes
+` and `value_input_nodes `, and on to any
+nodes that project to those from a Composition within which the EMComposition is `nested `.
+Retrieved_nodes that *do* project to an outer Composition receive their errors from those nodes, which are also
+backpropagated through the EMComposition. Fields can be selecdtively specified for learning in the **fields** argument
+or the **target_fields** argument of the EMComposition's constructor, as detailed below.
+
+*Field Weight Learning*. If **enable_learning** is True, then the `field_weights ` can
+be learned, by specifing these either in the **fields** argument or the **learn_field_weights** argument of the
+EMComposition's constructor, as detailed below. Learning field_weights implements a function comparable to the learning
+in an attention head of the `Transformer `_ architecture, although at present the
+field can only be scalar values rather than vectors or matrices, and it cannot receive input. These capabilities will
+be added in the future.
+
+The following arguments of the EMComposition's constructor can be used to configure learning:
+
+* **enable_learning**: specifies whether any learning is enabled for the EMComposition. If False,
+ no learning occurs; ` if True, then both error backpropagation and learning of `field_weights
+ ` can occur. If **enable_learning** is True, **use_gating_for_weighting**
+ must be False (see `note `).
+
+.. _EMComposition_Target_Fields:
+
+* **target_fields**: specifies which `retrieved_nodes ` are used to compute
+ errors, and propagate these back through the EMComposition to its `query ` and
+ `value_input_nodes `. If this is None (the default), all `retrieved_nodes
+ ` are used; if it is a list or tuple, then it must have the same number of entries
+ as there are fields, and each entry must be a boolean specifying whether the corresponding `retrieved_nodes
+ ` participate in learning, and errors are computed only for those nodes. This can
+ also be specified in a dict for the **fields** argument (see `fields `).
+
+.. _EMComposition_Field_Weights_Learning:
+
+* **learn_field_weights**: specifies which field_weights are subject to learning, and optionally the `learning_rate
+ ` for each; this can also be specified in a dict for the **fields** argument (see
+ `fields `). The following specfications can be used:
+
+ * *None*: all field_weights are subject to learning, and the `learning_rate ` for the
+ EMComposition is used as the learning_rate for all field_weights.
+
+ * *bool*: If True, all field_weights are subject to learning, and the `learning_rate `
+ for the EMComposition is used as the learning rate for all field_weights; if False, no field_weights are
+ subject to learning, regardless of `enable_learning `.
+
+ * *list* or *tuple*: must be the same length as the number of fields specified in the memory_template, and each entry
+ must be either True, False or a positive scalar value. If True, the corresponding field_weight is subject to
+ learning and the `learning_rate ` for the EMComposition is used to specify the
+ learning_ rate for that field; if False, the corresponding field_weight is not subject to learning; if a scalar
+ value is specified, it is used as the `learning_rate` for that field.
+
+* **learning_rate**: specifies the learning_rate for any `field_weights ` for which a
+ learning_rate is not individually specified in the **learn_field_weights** argument (see above).
.. _EMComposition_Structure:
@@ -617,7 +499,7 @@
` of the EMComposition, listed in its `query_input_nodes `
and `value_input_nodes ` attributes, respectively,
-.. _EMComposition_Memory:
+.. _EMComposition_Memory_Structure:
*Memory*
~~~~~~~~
@@ -672,8 +554,8 @@
* **Input**. The inputs to the EMComposition are provided to the `query_input_nodes `
and `value_input_nodes `. The former are used for matching to the corresponding
- `fields ` of the `memory `, while the latter are retrieved but not used
- for matching.
+ `fields ` of the `memory `, while the latter are retrieved
+ but not used for matching.
* **Concatenation**. By default, the input to every `query_input_node ` is passed to a
to its own `match_node ` through a `MappingProjection` that computes its
@@ -700,9 +582,9 @@
(or the `concatenate_queries_node ` if `concatenate_queries
` attribute is True) are passed through a `MappingProjection` that
computes the distance between the corresponding input (query) and each memory (key) for the corresponding field,
- the result of which is possed to the corresponding `match_node `. By default, the
- distance is computed as the normalized dot product (i.e., between the normalized query vector and the normalized
- key for the corresponding `field `, that is comparable to using cosine similarity). However,
+ the result of which is possed to the corresponding `match_node `. By default, the distance
+ is computed as the normalized dot product (i.e., between the normalized query vector and the normalized key for the
+ corresponding `field `, that is comparable to using cosine similarity). However,
if `normalize_memories ` is set to False, just the raw dot product is computed.
The distance can also be customized by specifying a different `function ` for the
`MappingProjection` to the `match_node `. The result is assigned as the `value
@@ -751,9 +633,12 @@
`gain ` parameter; if None is specified, the default value of the `Softmax` Function is used as the
`gain ` parameter (see `Softmax_Gain ` for additional details).
+.. _EMComposition_Retreived_Values:
+
* **Retrieve values by field**. The vector of softmax weights for each memory generated by the `softmax_node
` is passed through the Projections to the each of the `retrieved_nodes
- ` to compute the retrieved value for each field.
+ ` to compute the retrieved value for each field, which is assigned as the value
+ of the corresponding `retrieved_node `.
* **Decay memories**. If `memory_decay ` is True, then each of the memories is decayed
by the amount specified in `memory_decay_rate `.
@@ -768,19 +653,19 @@
.. _EMComposition_Storage:
-* **Store memories**. After the values have been retrieved, the inputs to for each field (i.e., values in the
- `query_input_nodes ` and `value_input_nodes `)
- are added by the `storage_node ` as a new entry in `memory