diff --git a/doc/getting-started/quick_start.ipynb b/doc/getting-started/quick_start.ipynb index 1ddb6f5fce..ba8ac892bb 100644 --- a/doc/getting-started/quick_start.ipynb +++ b/doc/getting-started/quick_start.ipynb @@ -523,7 +523,7 @@ " color: #bbbbff;\n", "}\n", "\n", - "
{
  \"_comment\": \"that's all\",
  \"model\"model:
type: dict
: {
    \"type_map\"type_map:
type: typing.list[str], optional
A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect.
: [
     \"H\",
     \"C\"
    ],

    \"descriptor\"descriptor:
type: dict
The descriptor of atomic environment.
: {
      \"type\"type:
type: str
The type of the descriptor. See explanation below.
- loc_frame: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.
- se_e2_a: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.
- se_e2_r: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.
- se_e3: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.
- se_a_tpe: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.
- se_atten: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.
- se_atten_v2: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.
- se_a_mask: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). aparam are required as an indicator matrix for the real/virtual sign of input atoms.
- hybrid: Concatenate of a list of descriptors as a new descriptor.
: \"se_e2_a\",
      \"sel\"sel:
type: str | typing.list[int], optional, default: auto
This parameter set the number of selected neighbors for each type of atom. It can be:
- list[int]. The length of the list should be the same as the number of atom types in the system. sel[i] gives the selected number of type-i neighbors. sel[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.
- str. Can be \"auto:factor\" or \"auto\". \"factor\" is a float number larger than 1. This option will automatically determine the sel. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the \"factor\". Finally the number is wraped up to 4 divisible. The option \"auto\" is equivalent to \"auto:1.1\".
: \"auto\",
      \"rcut_smth\"rcut_smth:
type: float, optional, default: 0.5
Where to start smoothing. For example the 1/r term is smoothed from rcut to rcut_smth
: 0.5,
      \"rcut\"rcut:
type: float, optional, default: 6.0
The cut-off radius.
: 6.0,
      \"neuron\"neuron:
type: typing.list[int], optional, default: [10, 20, 40]
Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.
: [
       25,
       50,
       100
      ],

      \"resnet_dt\"resnet_dt:
type: bool, optional, default: False
Whether to use a \"Timestep\" in the skip connection
: false,
      \"axis_neuron\"axis_neuron:
type: int, optional, default: 4, alias: n_axis_neuron
Size of the submatrix of G (embedding matrix).
: 16,
      \"seed\"seed:
type: NoneType | int, optional
Random seed for parameter initialization
: 1,
      \"_comment\": \" that's all\"
    },
    \"fitting_net\"fitting_net:
type: dict
The fitting of physical properties.
: {
      \"neuron\"neuron:
type: typing.list[int], optional, default: [120, 120, 120], alias: n_neuron
The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.
: [
       240,
       240,
       240
      ],

      \"resnet_dt\"resnet_dt:
type: bool, optional, default: True
Whether to use a \"Timestep\" in the skip connection
: true,
      \"seed\"seed:
type: NoneType | int, optional
Random seed for parameter initialization of the fitting net
: 1,
      \"_comment\": \" that's all\"
    },
    \"_comment\": \" that's all\"
  },
  \"learning_rate\"learning_rate:
type: dict, optional
The definition of learning rate
: {
    \"type\"type:
type: str, default: exp
The type of the learning rate.
: \"exp\",
    \"decay_steps\"decay_steps:
type: int, optional, default: 5000
The learning rate is decaying every this number of training steps.
: 50,
    \"start_lr\"start_lr:
type: float, optional, default: 0.001
The learning rate at the start of the training.
: 0.001,
    \"stop_lr\"stop_lr:
type: float, optional, default: 1e-08
The desired learning rate at the end of the training.
: 3.51e-08,
    \"_comment\": \"that's all\"
  },
  \"loss\"loss:
type: dict, optional
The definition of loss function. The loss type should be set to tensor, ener or left unset.
: {
    \"type\"type:
type: str, default: ener
The type of the loss. When the fitting type is ener, the loss type should be set to ener or left unset. When the fitting type is dipole or polar, the loss type should be set to tensor.
: \"ener\",
    \"start_pref_e\"start_pref_e:
type: float | int, optional, default: 0.02
The prefactor of energy loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the energy label should be provided by file energy.npy in each data system. If both start_pref_e and limit_pref_e are set to 0, then the energy will be ignored.
: 0.02,
    \"limit_pref_e\"limit_pref_e:
type: float | int, optional, default: 1.0
The prefactor of energy loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
: 1,
    \"start_pref_f\"start_pref_f:
type: float | int, optional, default: 1000
The prefactor of force loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the force label should be provided by file force.npy in each data system. If both start_pref_f and limit_pref_f are set to 0, then the force will be ignored.
: 1000,
    \"limit_pref_f\"limit_pref_f:
type: float | int, optional, default: 1.0
The prefactor of force loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
: 1,
    \"start_pref_v\"start_pref_v:
type: float | int, optional, default: 0.0
The prefactor of virial loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the virial label should be provided by file virial.npy in each data system. If both start_pref_v and limit_pref_v are set to 0, then the virial will be ignored.
: 0,
    \"limit_pref_v\"limit_pref_v:
type: float | int, optional, default: 0.0
The prefactor of virial loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
: 0,
    \"_comment\": \" that's all\"
  },
  \"training\"training:
type: dict
The training options.
: {
    \"training_data\"training_data:
type: dict, optional
Configurations of training data.
: {
      \"systems\"systems:
type: str | typing.list[str]
The data systems for training. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.
: [
       \"../00.data/training_data\"
      ],

      \"batch_size\"batch_size:
type: str | typing.list[int] | int, optional, default: auto
This key can be
- list: the length of which is the same as the systems _. The batch size of each system is given by the elements of the list.
- int: all systems _ use the same batch size.
- string \"auto\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.
- string \"auto:N\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.
- string \"mixed:N\": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.
If MPI is used, the value should be considered as the batch size per task.
: \"auto\",
      \"_comment\": \"that's all\"
    },
    \"validation_data\"validation_data:
type: NoneType | dict, optional, default: None
Configurations of validation data. Similar to that of training data, except that a numb_btch argument may be configured
: {
      \"systems\"systems:
type: str | typing.list[str]
The data systems for validation. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.
: [
       \"../00.data/validation_data\"
      ],

      \"batch_size\"batch_size:
type: str | typing.list[int] | int, optional, default: auto
This key can be
- list: the length of which is the same as the systems _. The batch size of each system is given by the elements of the list.
- int: all systems _ use the same batch size.
- string \"auto\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.
- string \"auto:N\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.
: \"auto\",
      \"numb_btch\"numb_btch:
type: int, optional, default: 1, alias: numb_batch
An integer that specifies the number of batches to be sampled for each validation period.
: 1,
      \"_comment\": \"that's all\"
    },
    \"numb_steps\"numb_steps:
type: int, alias: stop_batch
Number of training batch. Each training uses one batch of data.
: 10000,
    \"seed\"seed:
type: NoneType | int, optional
The random seed for getting frames from the training data set.
: 10,
    \"disp_file\"disp_file:
type: str, optional, default: lcurve.out
The file for printing learning curve.
: \"lcurve.out\",
    \"disp_freq\"disp_freq:
type: int, optional, default: 1000
The frequency of printing learning curve.
: 200,
    \"save_freq\"save_freq:
type: int, optional, default: 1000
The frequency of saving check point.
: 1000,
    \"_comment\": \"that's all\"
  }
}
" + "
{
  \"_comment\": \"that's all\",
  \"model\"model:
type: dict
: {
    \"type_map\"type_map:
type: typing.list[str], optional
A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect.
: [
     \"H\",
     \"C\"
    ],

    \"descriptor\"descriptor:
type: dict
The descriptor of atomic environment.
: {
      \"type\"type:
type: str
The type of the descriptor. See explanation below.
- loc_frame: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.
- se_e2_a: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.
- se_e2_r: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.
- se_e3: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.
- se_a_tpe: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.
- se_atten: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.
- se_atten_v2: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.
- se_a_mask: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). aparam are required as an indicator matrix for the real/virtual sign of input atoms.
- hybrid: Concatenate of a list of descriptors as a new descriptor.
: \"se_e2_a\",
      \"sel\"sel:
type: str | typing.list[int], optional, default: auto
This parameter set the number of selected neighbors for each type of atom. It can be:
- list[int]. The length of the list should be the same as the number of atom types in the system. sel[i] gives the selected number of type-i neighbors. sel[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.
- str. Can be \"auto:factor\" or \"auto\". \"factor\" is a float number larger than 1. This option will automatically determine the sel. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the \"factor\". Finally the number is wraped up to 4 divisible. The option \"auto\" is equivalent to \"auto:1.1\".
: \"auto\",
      \"rcut_smth\"rcut_smth:
type: float, optional, default: 0.5
Where to start smoothing. For example the 1/r term is smoothed from rcut to rcut_smth
: 0.5,
      \"rcut\"rcut:
type: float, optional, default: 6.0
The cut-off radius.
: 6.0,
      \"neuron\"neuron:
type: typing.list[int], optional, default: [10, 20, 40]
Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.
: [
       25,
       50,
       100
      ],

      \"resnet_dt\"resnet_dt:
type: bool, optional, default: False
Whether to use a \"Timestep\" in the skip connection
: false,
      \"axis_neuron\"axis_neuron:
type: int, optional, default: 4, alias: n_axis_neuron
Size of the submatrix of G (embedding matrix).
: 16,
      \"seed\"seed:
type: NoneType | int, optional
Random seed for parameter initialization
: 1,
      \"_comment\": \" that's all\"
    },
    \"fitting_net\"fitting_net:
type: dict
The fitting of physical properties.
: {
      \"neuron\"neuron:
type: typing.list[int], optional, default: [120, 120, 120], alias: n_neuron
The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.
: [
       240,
       240,
       240
      ],

      \"resnet_dt\"resnet_dt:
type: bool, optional, default: True
Whether to use a \"Timestep\" in the skip connection
: true,
      \"seed\"seed:
type: NoneType | int, optional
Random seed for parameter initialization of the fitting net
: 1,
      \"_comment\": \" that's all\"
    },
    \"_comment\": \" that's all\"
  },
  \"learning_rate\"learning_rate:
type: dict, optional
The definition of learning rate
: {
    \"type\"type:
type: str, default: exp
The type of the learning rate.
: \"exp\",
    \"decay_steps\"decay_steps:
type: int, optional, default: 5000
The learning rate is decaying every this number of training steps.
: 50,
    \"start_lr\"start_lr:
type: float, optional, default: 0.001
The learning rate at the start of the training.
: 0.001,
    \"stop_lr\"stop_lr:
type: float, optional, default: 1e-08
The desired learning rate at the end of the training.
: 3.51e-08,
    \"_comment\": \"that's all\"
  },
  \"loss\"loss:
type: dict, optional
The definition of loss function. The loss type should be set to tensor, ener or left unset.
: {
    \"type\"type:
type: str, default: ener
The type of the loss. When the fitting type is ener, the loss type should be set to ener or left unset. When the fitting type is dipole or polar, the loss type should be set to tensor.
: \"ener\",
    \"start_pref_e\"start_pref_e:
type: float | int, optional, default: 0.02
The prefactor of energy loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the energy label should be provided by file energy.npy in each data system. If both start_pref_e and limit_pref_e are set to 0, then the energy will be ignored.
: 0.02,
    \"limit_pref_e\"limit_pref_e:
type: float | int, optional, default: 1.0
The prefactor of energy loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
: 1,
    \"start_pref_f\"start_pref_f:
type: float | int, optional, default: 1000
The prefactor of force loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the force label should be provided by file force.npy in each data system. If both start_pref_f and limit_pref_f are set to 0, then the force will be ignored.
: 1000,
    \"limit_pref_f\"limit_pref_f:
type: float | int, optional, default: 1.0
The prefactor of force loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
: 1,
    \"start_pref_v\"start_pref_v:
type: float | int, optional, default: 0.0
The prefactor of virial loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the virial label should be provided by file virial.npy in each data system. If both start_pref_v and limit_pref_v are set to 0, then the virial will be ignored.
: 0,
    \"limit_pref_v\"limit_pref_v:
type: float | int, optional, default: 0.0
The prefactor of virial loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
: 0,
    \"_comment\": \" that's all\"
  },
  \"training\"training:
type: dict
The training options.
: {
    \"training_data\"training_data:
type: dict, optional
Configurations of training data.
: {
      \"systems\"systems:
type: str | typing.list[str]
The data systems for training. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.
: [
       \"../00.data/training_data\"
      ],

      \"batch_size\"batch_size:
type: str | typing.list[int] | int, optional, default: auto
This key can be
- list: the length of which is the same as the systems_. The batch size of each system is given by the elements of the list.
- int: all systems_ use the same batch size.
- string \"auto\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.
- string \"auto:N\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.
- string \"mixed:N\": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.
If MPI is used, the value should be considered as the batch size per task.
: \"auto\",
      \"_comment\": \"that's all\"
    },
    \"validation_data\"validation_data:
type: NoneType | dict, optional, default: None
Configurations of validation data. Similar to that of training data, except that a numb_btch argument may be configured
: {
      \"systems\"systems:
type: str | typing.list[str]
The data systems for validation. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.
: [
       \"../00.data/validation_data\"
      ],

      \"batch_size\"batch_size:
type: str | typing.list[int] | int, optional, default: auto
This key can be
- list: the length of which is the same as the systems_. The batch size of each system is given by the elements of the list.
- int: all systems_ use the same batch size.
- string \"auto\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.
- string \"auto:N\": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.
: \"auto\",
      \"numb_btch\"numb_btch:
type: int, optional, default: 1, alias: numb_batch
An integer that specifies the number of batches to be sampled for each validation period.
: 1,
      \"_comment\": \"that's all\"
    },
    \"numb_steps\"numb_steps:
type: int, alias: stop_batch
Number of training batch. Each training uses one batch of data.
: 10000,
    \"seed\"seed:
type: NoneType | int, optional
The random seed for getting frames from the training data set.
: 10,
    \"disp_file\"disp_file:
type: str, optional, default: lcurve.out
The file for printing learning curve.
: \"lcurve.out\",
    \"disp_freq\"disp_freq:
type: int, optional, default: 1000
The frequency of printing learning curve.
: 200,
    \"save_freq\"save_freq:
type: int, optional, default: 1000
The frequency of saving check point.
: 1000,
    \"_comment\": \"that's all\"
  }
}
" ], "text/plain": [ ""